29 Define(
"USE_SINGLE_PRECISION");
33 if (use_single_precision) {
34 Define(
"USE_SINGLE_PRECISION");
42 opt <<
"-D " << definition <<
" ";
46 opt <<
"-I " << abs_path <<
" ";
50 opt <<
"-D " << key <<
"=" << value <<
" ";
55 opt <<
"-D " << key <<
"=" << std::setprecision(6) << std::fixed << value <<
"f ";
60 opt <<
"-D " << key <<
"=" << value <<
" ";
69 if (device_type ==
"cpu" || device_type ==
"CPU") {
71 }
else if (device_type ==
"gpu" || device_type ==
"GPU") {
81 auto num_of_devices =
devices_.size();
82 if (device_id_ < 0 || device_id_ > num_of_devices-1) {
84 throw std::out_of_range(
"device_id out of range");
96 cl_ulong tstart, tend;
97 event.getProfilingInfo(CL_PROFILING_COMMAND_START, & tstart);
98 event.getProfilingInfo(CL_PROFILING_COMMAND_END, & tend);
100 return (tend - tstart) * 1.0E-9 ;
105 std::vector<cl::Platform> platforms;
107 if (platforms.size() == 0) {
108 std::cerr<<
"No platform found, install CUDA or AMD SDK first\n";
111 for (
int i=0;
i < platforms.size();
i++) {
112 std::vector<cl::Device> supportDevices;
113 platforms.at(
i).getDevices(CL_DEVICE_TYPE_ALL, &supportDevices);
114 for (
int j=0;
j < supportDevices.size();
j++) {
115 if (supportDevices.at(
j).getInfo<CL_DEVICE_TYPE>() == device_type) {
117 cl_context_properties properties[] =
118 { CL_CONTEXT_PLATFORM,
119 (cl_context_properties) (platforms.at(
i))(),
126 std::cerr<<
"no platform support device type"<<device_type<<std::endl;
134 std::ifstream kernelFile(fname.c_str());
135 if(!kernelFile.is_open()) {
136 throw std::runtime_error(
"Fail to open kernel file: "+fname);
138 std::string sprog(std::istreambuf_iterator<char> (kernelFile),
139 (std::istreambuf_iterator<char> ()));
144 program.build(
devices_, compile_option.c_str());
146 }
catch(cl::Error &
err) {
147 std::cerr << err.what() <<
"(" << err.err() <<
")\n" \
148 << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(
device_);
158 template <
typename ValueType>
164 source_vector.size()*
sizeof(ValueType), source_vector.data());
167 source_vector.size()*
sizeof(ValueType), source_vector.data());
172 size_t width,
size_t height,
bool read_only) {
175 img_fmt.image_channel_order = CL_RGBA;
176 img_fmt.image_channel_data_type = CL_FLOAT;
177 size_t row_pitch = 0;
180 img_fmt, width, height, row_pitch, source_vector.data());
183 img_fmt, width, height, row_pitch, source_vector.data());
203 template <
typename ValueType>
212 source_vector.size()*
sizeof(ValueType),
213 source_vector.data(),
220 template <
typename ValueType>
222 std::vector<ValueType> & dst_vector)
229 dst_vector.size()*
sizeof(ValueType),
239 size_t size_in_bytes)
257 std::cout <<
"Device ID: " << device_id << std::endl;
258 std::cout <<
"Device Name: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
259 std::cout <<
"Max computing units: " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
260 std::cout << std::endl;
261 std::cout <<
"Max workgroup size: " << device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
262 std::cout << std::endl;
263 std::cout <<
"Max work items in one work group: ";
264 for (
auto sz : device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()) {
265 std::cout << sz <<
" ";
267 std::cout << std::endl;
268 std::cout <<
"Global memory size: " << device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>()/1024/1024/1024 <<
"GB";
269 std::cout << std::endl;
270 std::cout <<
"Local memory size: " << device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>()/1024 <<
"KB";
272 std::cout << std::endl << std::endl;