31 #ifndef SPECIAL_PURPOSE_IMPL_H_ 32 #define SPECIAL_PURPOSE_IMPL_H_ 34 #include <shogun/lib/config.h> 43 #endif // HAVE_VIENNACL 51 namespace implementation
54 namespace special_purpose
60 template <Backend backend,
class Matrix>
64 typedef typename Matrix::Scalar
T;
67 static void compute(Matrix A, Matrix result);
72 template <
class Matrix>
76 typedef typename Matrix::Scalar
T;
82 for (int32_t i=0; i<len; i++)
90 template <
class Matrix>
91 struct logistic<Backend::VIENNACL, Matrix>
94 typedef typename Matrix::Scalar
T;
99 const std::string operation =
"return 1.0/(1+exp(-1*element));";
101 std::string kernel_name =
"logistic_" + ocl::get_type_string<T>();
102 viennacl::ocl::kernel& kernel =
103 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
105 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
107 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
108 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
109 result.vcl_matrix(), cl_int(result.offset)));
113 #endif // HAVE_VIENNACL 118 template <Backend backend,
class Matrix>
122 typedef typename Matrix::Scalar
T;
125 static void compute(Matrix A, Matrix C);
130 template <
class Matrix>
134 typedef typename Matrix::Scalar
T;
140 for (int32_t i=0; i<len; i++)
141 C[i] *= A[i] * (1.0-A[i]);
148 template <
class Matrix>
152 typedef typename Matrix::Scalar
T;
157 const std::string operation =
"return element2 * element1*(1.0-element1);";
159 std::string kernel_name =
"multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
160 viennacl::ocl::kernel& kernel =
161 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
163 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
165 viennacl::ocl::enqueue(kernel(
166 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
167 C.vcl_matrix(), cl_int(C.offset),
168 C.vcl_matrix(), cl_int(C.offset)));
172 #endif // HAVE_VIENNACL 177 template <Backend backend,
class Matrix>
181 typedef typename Matrix::Scalar
T;
184 static void compute(Matrix A, Matrix result);
189 template <
class Matrix>
193 typedef typename Matrix::Scalar
T;
199 for (int32_t i=0; i<len; i++)
207 template <
class Matrix>
211 typedef typename Matrix::Scalar
T;
216 const std::string operation =
"return max((DATATYPE)0,element);";
218 std::string kernel_name =
"rectified_linear_" + ocl::get_type_string<T>();
219 viennacl::ocl::kernel& kernel =
220 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
222 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
224 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
225 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
226 result.vcl_matrix(), cl_int(result.offset)));
230 #endif // HAVE_VIENNACL 235 template <Backend backend,
class Matrix>
239 typedef typename Matrix::Scalar
T;
242 static void compute(Matrix A, Matrix C);
247 template <
class Matrix>
251 typedef typename Matrix::Scalar
T;
257 for (int32_t i=0; i<len; i++)
266 template <
class Matrix>
270 typedef typename Matrix::Scalar
T;
275 const std::string operation =
"return element1==0 ? 0 : element2;";
277 std::string kernel_name =
"multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
278 viennacl::ocl::kernel& kernel =
279 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
281 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
283 viennacl::ocl::enqueue(kernel(
284 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
285 C.vcl_matrix(), cl_int(C.offset),
286 C.vcl_matrix(), cl_int(C.offset)));
290 #endif // HAVE_VIENNACL 295 template <Backend backend,
class Matrix>
299 typedef typename Matrix::Scalar
T;
309 template <
class Matrix>
313 typedef typename Matrix::Scalar
T;
327 for (int32_t j=0; j<A.
num_cols; j++)
330 for (int32_t i=0; i<A.
num_rows; i++)
334 for (int32_t k=0; k<A.
num_rows; k++)
343 template <
class Matrix>
344 struct softmax<Backend::VIENNACL, Matrix>
347 typedef typename Matrix::Scalar
T;
351 static viennacl::ocl::kernel& generate_kernel()
353 std::string kernel_name =
"softmax_" + ocl::get_type_string<T>();
355 if (ocl::kernel_exists(kernel_name))
356 return ocl::get_kernel(kernel_name);
358 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
362 __kernel void KERNEL_NAME( 363 __global DATATYPE* A, int nrows, int ncols, int offset) 365 int j = get_global_id(0); 370 DATATYPE col_max = -INFINITY; 371 for (int i=0; i<nrows; i++) 372 col_max = max(col_max, A[offset + i+j*nrows]); 374 DATATYPE col_sum = 0; 375 for (int i=0; i<nrows; i++) 376 col_sum += exp(A[offset + i+j*nrows]-col_max); 378 DATATYPE normalizer = log(col_sum); 379 for (int i=0; i<nrows; i++) 381 int index = offset + i+j*nrows; 382 A[index] = exp(A[index]-col_max-normalizer); 388 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 390 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 400 viennacl::ocl::kernel& kernel = generate_kernel<T>();
401 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
403 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
404 cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
408 #endif // HAVE_VIENNACL 413 template <Backend backend,
class Matrix>
417 typedef typename Matrix::Scalar
T;
422 static T
compute(Matrix P, Matrix Q);
426 template <
class Matrix>
430 typedef typename Matrix::Scalar
T;
443 return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).
sum();
449 template <
class Matrix>
453 typedef typename Matrix::Scalar
T;
457 static viennacl::ocl::kernel& generate_kernel()
459 std::string kernel_name =
"cross_entropy_" + ocl::get_type_string<T>();
461 if (ocl::kernel_exists(kernel_name))
462 return ocl::get_kernel(kernel_name);
464 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
468 __kernel void KERNEL_NAME( 469 __global DATATYPE* p, int size, int p_offset, 470 __global DATATYPE* q, int q_offset, 471 __global DATATYPE* result) 473 __local DATATYPE buffer[WORK_GROUP_SIZE_1D]; 475 int local_id = get_local_id(0); 477 DATATYPE thread_sum = 0; 478 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D) 479 thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30); 481 buffer[local_id] = thread_sum; 483 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1) 485 barrier(CLK_LOCAL_MEM_FENCE); 487 buffer[local_id] += buffer[local_id + j]; 490 barrier(CLK_LOCAL_MEM_FENCE); 492 if (get_global_id(0)==0) 493 *result = -1*buffer[0]; 498 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 500 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 501 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D); 511 viennacl::ocl::kernel& kernel = generate_kernel<T>();
513 CGPUVector<T> result(1);
515 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
516 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
517 Q.vcl_matrix(), cl_int(Q.offset),
518 result.vcl_vector()));
523 #endif // HAVE_VIENNACL 528 template <Backend backend,
class Matrix>
532 typedef typename Matrix::Scalar
T;
537 static T
compute(Matrix P, Matrix Q);
541 template <
class Matrix>
545 typedef typename Matrix::Scalar
T;
558 return 0.5 * (P_eig - Q_eig).array().square().sum();
564 template <
class Matrix>
568 typedef typename Matrix::Scalar
T;
572 static viennacl::ocl::kernel& generate_kernel()
574 std::string kernel_name =
"squared_error_" + ocl::get_type_string<T>();
576 if (ocl::kernel_exists(kernel_name))
577 return ocl::get_kernel(kernel_name);
579 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
583 __kernel void KERNEL_NAME( 584 __global DATATYPE* p, int size, int p_offset, 585 __global DATATYPE* q, int q_offset, 586 __global DATATYPE* result) 588 __local DATATYPE buffer[WORK_GROUP_SIZE_1D]; 590 int local_id = get_local_id(0); 592 DATATYPE thread_sum = 0; 593 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D) 594 thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2); 596 buffer[local_id] = thread_sum; 598 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1) 600 barrier(CLK_LOCAL_MEM_FENCE); 602 buffer[local_id] += buffer[local_id + j]; 605 barrier(CLK_LOCAL_MEM_FENCE); 607 if (get_global_id(0)==0) 608 *result = 0.5*buffer[0]; 613 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 615 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 616 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D); 626 viennacl::ocl::kernel& kernel = generate_kernel<T>();
628 CGPUVector<T> result(1);
630 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
631 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
632 Q.vcl_matrix(), cl_int(Q.offset),
633 result.vcl_vector()));
638 #endif // HAVE_VIENNACL 647 #endif // SPECIAL_PURPOSE_IMPL_H_ Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static void compute(SGMatrix< T > A, SGMatrix< T > result)
static void compute(SGMatrix< T > A)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
Backend
All currently supported linear algebra backend libraries, with a default backend, which will be used ...
static void compute(SGMatrix< T > A, SGMatrix< T > C)
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static void compute(Matrix A, Matrix result)
static void compute(SGMatrix< T > A, SGMatrix< T > C)
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
all of classes and functions are contained in the shogun namespace
T sum(const Container< T > &a, bool no_diag=false)
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static float64_t exp(float64_t x)
static float64_t log(float64_t v)
T max(const Container< T > &a)