Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
opencl_backend.cc
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file opencl_backend.cc
1 /*******************************************************************************
2  * Copyright (c) 2018-2019 LongGang Pang, lgpang@qq.com
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and/or associated documentation files (the
6  * "Materials"), to deal in the Materials without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Materials, and to
9  * permit persons to whom the Materials are furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included
13  * in all copies or substantial portions of the Materials.
14  *
15  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
22  ******************************************************************************/
23 
24 #include "include/opencl_backend.h"
25 
26 namespace clvisc {
27 
29  Define("USE_SINGLE_PRECISION");
30 };
31 
32 CompileOption::CompileOption(bool use_single_precision, bool optimize) {
33  if (use_single_precision) {
34  Define("USE_SINGLE_PRECISION");
35  }
36  if (! optimize) {
37  Define("-cl-opt-disable");
38  }
39 }
40 
42  opt << "-D " << definition <<" ";
43 }
44 
46  opt << "-I " << abs_path <<" ";
47 }
48 
50  opt << "-D " << key << "=" << value << " ";
51 }
52 
53 // for float values, use "#define key 0.33f" if value == 0.33.
55  opt << "-D " << key << "=" << std::setprecision(6) << std::fixed << value << "f ";
56 }
57 
58 // for double values, use "#define key 0.33" if value == 0.33.
60  opt << "-D " << key << "=" << value << " ";
61 }
62 
64  return opt.str();
65 }
66 
67 OpenclBackend::OpenclBackend(std::string device_type, int device_id) {
68  // select device type and device id (if there are multiple cpu/gpus)
69  if (device_type == "cpu" || device_type == "CPU") {
70  device_type_ = CL_DEVICE_TYPE_CPU;
71  } else if (device_type == "gpu" || device_type == "GPU") {
72  device_type_ = CL_DEVICE_TYPE_GPU;
73  } else {
74  device_type_ = CL_DEVICE_TYPE_ALL;
75  };
76  device_id_ = device_id;
77  // create context for the designated device type
79  // choose one device if there are many of the same kind
80  devices_ = context_.getInfo<CL_CONTEXT_DEVICES>();
81  auto num_of_devices = devices_.size();
82  if (device_id_ < 0 || device_id_ > num_of_devices-1) {
83  DeviceInfo();
84  throw std::out_of_range("device_id out of range");
85  } else {
87  }
88  queue_ = cl::CommandQueue(context_, device_, CL_QUEUE_PROFILING_ENABLE);
89 }
90 
91 
92 
95 {
96  cl_ulong tstart, tend;
97  event.getProfilingInfo(CL_PROFILING_COMMAND_START, & tstart);
98  event.getProfilingInfo(CL_PROFILING_COMMAND_END, & tend);
99  //std::cout<<"#run time="<<(tend - tstart )/1000<<"ms\n";
100  return (tend - tstart) * 1.0E-9 ;
101 }
102 
103 cl::Context OpenclBackend::CreateContext_(const cl_int & device_type)
104 {
105  std::vector<cl::Platform> platforms;
106  cl::Platform::get(&platforms);
107  if (platforms.size() == 0) {
108  std::cerr<<"No platform found, install CUDA or AMD SDK first\n";
109  exit(-1);
110  } else {
111  for (int i=0; i < platforms.size(); i++) {
112  std::vector<cl::Device> supportDevices;
113  platforms.at(i).getDevices(CL_DEVICE_TYPE_ALL, &supportDevices);
114  for (int j=0; j < supportDevices.size(); j++) {
115  if (supportDevices.at(j).getInfo<CL_DEVICE_TYPE>() == device_type) {
116  //std::cout<<"#Found device "<<device_type<<" on platform "<<i<<std::endl;
117  cl_context_properties properties[] =
118  { CL_CONTEXT_PLATFORM,
119  (cl_context_properties) (platforms.at(i))(),
120  0 };
121  return cl::Context(device_type, properties);
122  }// Found supported device and platform
123  }// End for devices
124  }// End for platform
126  std::cerr<<"no platform support device type"<<device_type<<std::endl;
127  exit(-1);
128  }
129 }
130 
132  const std::string & compile_option)
133 {
134  std::ifstream kernelFile(fname.c_str());
135  if(!kernelFile.is_open()) {
136  throw std::runtime_error("Fail to open kernel file: "+fname);
137  }
138  std::string sprog(std::istreambuf_iterator<char> (kernelFile),
139  (std::istreambuf_iterator<char> ()));
140  cl::Program::Sources prog(1, std::make_pair(sprog.c_str(), sprog.length()));
141  auto program = cl::Program(context_, prog);
142  //programs.push(program);
143  try{
144  program.build(devices_, compile_option.c_str());
145  kernelFile.close();
146  } catch(cl::Error & err) {
147  std::cerr << err.what() << "(" << err.err() << ")\n" \
148  << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device_);
149  }
150 
151  return program;
152 }
153 
154 cl::Buffer OpenclBackend::CreateBuffer(size_t bytes_of_buffer) {
155  return cl::Buffer(context_, CL_MEM_READ_WRITE, bytes_of_buffer);
156 }
157 
158 template <typename ValueType>
159 cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<ValueType> & source_vector,
160  bool read_only) {
161  //copy content from a source vector to global memory of device
162  if (read_only) {
163  return cl::Buffer(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
164  source_vector.size()*sizeof(ValueType), source_vector.data());
165  } else {
166  return cl::Buffer(context_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
167  source_vector.size()*sizeof(ValueType), source_vector.data());
168  }
169 }
170 
171 cl::Image2D OpenclBackend::CreateImage2DByCopyVector(std::vector<cl_float4> & source_vector,
172  size_t width, size_t height, bool read_only) {
173  //copy content from a source vector to global memory of device
174  cl::ImageFormat img_fmt;
175  img_fmt.image_channel_order = CL_RGBA;
176  img_fmt.image_channel_data_type = CL_FLOAT;
177  size_t row_pitch = 0;
178  if (read_only) {
179  return cl::Image2D(context_, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
180  img_fmt, width, height, row_pitch, source_vector.data());
181  } else {
182  return cl::Image2D(context_, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
183  img_fmt, width, height, row_pitch, source_vector.data());
184  }
185 }
186 
188  const cl::NDRange & global_size,
189  const cl::NDRange & local_size) {
192  kernel_, // kernel name
193  cl::NullRange, // offset
194  global_size, // global size
195  local_size, // local size (automatically set by system)
196  NULL, // event waitting list
197  &event); // event for profiling
198  event.wait();
199 }
200 
201 
202 // from std::vector to cl::Buffer
203 template <typename ValueType>
204 void OpenclBackend::enqueue_copy(const std::vector<ValueType> & source_vector,
205  cl::Buffer & dst_buffer)
206 {
209  dst_buffer, // dst buffer
210  CL_TRUE, // blocking reading
211  0, // offset
212  source_vector.size()*sizeof(ValueType), // size
213  source_vector.data(), // source vector
214  NULL,
215  &event);
216  event.wait();
217 }
218 
219 // from cl::Buffer to std::vector
220 template <typename ValueType>
221 void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer,
222  std::vector<ValueType> & dst_vector)
223 {
226  source_buffer, // source buffer
227  CL_TRUE, // blocking reading
228  0, // offset
229  dst_vector.size()*sizeof(ValueType), // size
230  dst_vector.data(), // dst vector
231  NULL,
232  &event);
233  event.wait();
234 }
235 
236 // from cl::Buffer to cl::Buffer
237 void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer,
238  cl::Buffer & dst_buffer,
239  size_t size_in_bytes)
240 {
243  source_buffer, // source buffer
244  dst_buffer, // dst buffer
245  0, // src offset
246  0, // dst offset
247  size_in_bytes, // size
248  NULL, // waiting event-list
249  &event); // event
250  event.wait();
251 }
252 
253 
255  int device_id = 0;
256  for (auto device : devices_) {
257  std::cout << "Device ID: " << device_id << std::endl;
258  std::cout << "Device Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
259  std::cout << "Max computing units: " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
260  std::cout << std::endl;
261  std::cout << "Max workgroup size: " << device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
262  std::cout << std::endl;
263  std::cout << "Max work items in one work group: ";
264  for (auto sz : device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()) {
265  std::cout << sz << " ";
266  }
267  std::cout << std::endl;
268  std::cout << "Global memory size: " << device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()/1024/1024/1024 << "GB";
269  std::cout << std::endl;
270  std::cout << "Local memory size: " << device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()/1024 << "KB";
271 
272  std::cout << std::endl << std::endl;
273  device_id ++;
274  }
275 }
276 
279  return device_type_;
280 }
281 
282 // template member functions need explicit declearation on mac
283 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_int> & source_vector, bool read_only);
284 
285 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real> & source_vector, bool read_only);
286 
287 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real4> & source_vector, bool read_only);
288 
289 // cl_real3 is the same datatype as cl_real4 in cl.hpp, so one can not re-declear
290 //template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real3> & source_vector, bool read_only);
291 
292 template cl::Buffer OpenclBackend::CreateBufferByCopyVector(std::vector<cl_real8> & source_vector, bool read_only);
293 
294 template void OpenclBackend::enqueue_copy(const std::vector<cl_int> & source_vector, cl::Buffer & dst_buffer);
295 
296 template void OpenclBackend::enqueue_copy(const std::vector<cl_real> & source_vector, cl::Buffer & dst_buffer);
297 
298 template void OpenclBackend::enqueue_copy(const std::vector<cl_real4> & source_vector, cl::Buffer & dst_buffer);
299 
300 template void OpenclBackend::enqueue_copy(const std::vector<cl_real8> & source_vector, cl::Buffer & dst_buffer);
301 
302 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_int> & dst_vector);
303 
304 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real> & dst_vector);
305 
306 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real4> & dst_vector);
307 
308 template void OpenclBackend::enqueue_copy(const cl::Buffer & source_buffer, std::vector<cl_real8> & dst_vector);
309 
310 } // end namespace clvisc