SHOGUN  6.0.0
LinalgBackendViennaclKernels.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016, Shogun-Toolbox e.V. <shogun-team@shogun-toolbox.org>
3  * All rights reserved.
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright notice,
8  * this list of conditions and the following disclaimer.
9  *
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  *
14  * 3. Neither the name of the copyright holder nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * Authors: 2016 Pan Deng, Soumyajit De, Heiko Strathmann, Viktor Gal
31  */
32 
33 #ifndef LINALG_BACKEND_VIENNACL_KERNELS_H__
34 #define LINALG_BACKEND_VIENNACL_KERNELS_H__
35 
36 #include <shogun/lib/common.h>
37 
38 #ifdef HAVE_VIENNACL
40 #include <memory>
41 
42 namespace shogun
43 {
47  template <typename T>
48  static viennacl::ocl::kernel& generate_max_kernel()
49  {
50  std::string kernel_name = "max_" + linalg::implementation::ocl::get_type_string<T>();
51 
52  if (linalg::implementation::ocl::kernel_exists(kernel_name))
53  return linalg::implementation::ocl::get_kernel(kernel_name);
54 
55  std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
56 
57  source.append(
58  R"(
59  __kernel void KERNEL_NAME(
60  __global DATATYPE* vec, int size, int offset,
61  __global DATATYPE* result)
62  {
63  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
64  int local_id = get_local_id(0);
65  DATATYPE thread_max = -INFINITY;
66  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
67  {
68  DATATYPE v = vec[i+offset];
69  thread_max = max(v, thread_max);
70  }
71  buffer[local_id] = thread_max;
72  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
73  {
74  barrier(CLK_LOCAL_MEM_FENCE);
75  if (local_id < j)
76  buffer[local_id] = max(buffer[local_id], buffer[local_id + j]);
77  }
78  barrier(CLK_LOCAL_MEM_FENCE);
79  if (get_global_id(0)==0)
80  *result = buffer[0];
81  }
82  )"
83  );
84 
85  viennacl::ocl::kernel& kernel = linalg::implementation::ocl::compile_kernel(kernel_name, source);
86 
87  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
88  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
89 
90  return kernel;
91  }
92 
98  template <class T>
99  static viennacl::ocl::kernel& generate_sum_kernel(bool no_diag)
100  {
101  std::string kernel_name = "sum_" + linalg::implementation::ocl::get_type_string<T>();
102  if (no_diag) kernel_name.append("_no_diag");
103 
104  if (linalg::implementation::ocl::kernel_exists(kernel_name))
105  return linalg::implementation::ocl::get_kernel(kernel_name);
106 
107  std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
108  if (no_diag) source.append("#define NO_DIAG\n");
109 
110  source.append(
111  R"(
112  __kernel void KERNEL_NAME(
113  __global DATATYPE* mat, int nrows, int ncols, int offset,
114  __global DATATYPE* result)
115  {
116  __local DATATYPE buffer[WORK_GROUP_SIZE_1D];
117  int size = nrows*ncols;
118 
119  int local_id = get_local_id(0);
120 
121  DATATYPE thread_sum = 0;
122  for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D)
123  {
124  #ifdef NO_DIAG
125  if (!(i/nrows == i%nrows))
126  #endif
127  thread_sum += mat[i+offset];
128  }
129 
130  buffer[local_id] = thread_sum;
131 
132  for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1)
133  {
134  barrier(CLK_LOCAL_MEM_FENCE);
135  if (local_id < j)
136  buffer[local_id] += buffer[local_id + j];
137  }
138 
139  barrier(CLK_LOCAL_MEM_FENCE);
140 
141  if (get_global_id(0)==0)
142  *result = buffer[0];
143  }
144  )"
145  );
146 
147  viennacl::ocl::kernel& kernel =
148  linalg::implementation::ocl::compile_kernel(kernel_name, source);
149 
150  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
151  kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D);
152 
153  return kernel;
154  }
155 
161  template <class T>
162  static viennacl::ocl::kernel& generate_colwise_sum_kernel(bool no_diag)
163  {
164  std::string kernel_name = "colwise_sum_" + linalg::implementation::ocl::get_type_string<T>();
165  if (no_diag) kernel_name.append("_no_diag");
166 
167  if (linalg::implementation::ocl::kernel_exists(kernel_name))
168  return linalg::implementation::ocl::get_kernel(kernel_name);
169 
170  std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
171  if (no_diag) source.append("#define NO_DIAG\n");
172 
173  source.append(
174  R"(
175  __kernel void KERNEL_NAME(
176  __global DATATYPE* mat, int nrows, int ncols, int offset,
177  __global DATATYPE* result, int result_offset)
178  {
179  int j = get_global_id(0);
180 
181  if (j>=ncols)
182  return;
183 
184  DATATYPE sum = 0;
185  for (int i=0; i<nrows; i++)
186  {
187  #ifdef NO_DIAG
188  if (i!=j)
189  #endif
190  sum += mat[offset+i+j*nrows];
191  }
192 
193  result[j+result_offset] = sum;
194  }
195  )"
196  );
197 
198  viennacl::ocl::kernel& kernel =
199  linalg::implementation::ocl::compile_kernel(kernel_name, source);
200 
201  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
202 
203  return kernel;
204  }
205 
211  template <class T>
212  static viennacl::ocl::kernel& generate_rowwise_sum_kernel(bool no_diag)
213  {
214  std::string kernel_name = "rowwise_sum_" + linalg::implementation::ocl::get_type_string<T>();
215  if (no_diag) kernel_name.append("_no_diag");
216 
217  if (linalg::implementation::ocl::kernel_exists(kernel_name))
218  return linalg::implementation::ocl::get_kernel(kernel_name);
219 
220  std::string source = linalg::implementation::ocl::generate_kernel_preamble<T>(kernel_name);
221  if (no_diag) source.append("#define NO_DIAG\n");
222 
223  source.append(
224  R"(
225  __kernel void KERNEL_NAME(
226  __global DATATYPE* mat, int nrows, int ncols, int offset,
227  __global DATATYPE* result, int result_offset)
228  {
229  int i = get_global_id(0);
230 
231  if (i>=nrows)
232  return;
233 
234  DATATYPE sum = 0;
235  for (int j=0; j<ncols; j++)
236  {
237  #ifdef NO_DIAG
238  if (i!=j)
239  #endif
240  sum += mat[offset+i+j*nrows];
241  }
242 
243  result[i+result_offset] = sum;
244  }
245  )"
246  );
247 
248  viennacl::ocl::kernel& kernel = linalg::implementation::ocl::compile_kernel(kernel_name, source);
249 
250  kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D);
251 
252  return kernel;
253  }
254 
255 }
256 #endif // HAVE_VIENNACL
257 
258 #endif // LINALG_BACKEND_VIENNACL_KERNELS_H__
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

SHOGUN Machine Learning Toolbox - Documentation