#define CL_TARGET_OPENCL_VERSION 110 #ifdef __APPLE__ #include #else #include #endif #include #include #include #include std::string loadKernel(const char *name) { std::fstream fin(name); std::string result( (std::istreambuf_iterator(fin)), std::istreambuf_iterator() ); return result; } cl_program createProgram(const std::string &source, cl_context context) { size_t lengths[1] = { source.size() }; const char *sources[1] = { source.data() }; cl_int error = 0; cl_program program = clCreateProgramWithSource(context, 1, sources, lengths, &error); return program; } int main() { // Geting information about a platform and devices cl_uint platformIdCount = 0; clGetPlatformIDs(0, nullptr, &platformIdCount); std::cout << "Number of platforms: " << platformIdCount << std::endl; std::vector platformIds(platformIdCount); clGetPlatformIDs(platformIdCount, platformIds.data(), nullptr); for (auto it = platformIds.begin(); it != platformIds.end(); it++) { std::cout << *it << std::endl; } std::cout << std::endl; cl_uint deviceIdCount = 0; clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceIdCount); std::cout << "Number of devices: " << deviceIdCount << std::endl; std::vector deviceIds(deviceIdCount); clGetDeviceIDs(platformIds[0], CL_DEVICE_TYPE_ALL, deviceIdCount, deviceIds.data(), nullptr); for (auto it = deviceIds.begin(); it != deviceIds.end(); it++) { std::cout << *it << std::endl; } std::cout << std::endl; // creating the context cl_int error; const cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, reinterpret_cast(platformIds[0]), 0, 0 }; cl_context context = clCreateContext( contextProperties, deviceIdCount, deviceIds.data(), nullptr, nullptr, &error ); // building program cl_program program = createProgram(loadKernel("kernel_saxpy.cl"), context); clBuildProgram(program, deviceIdCount, deviceIds.data(), nullptr, nullptr, nullptr); // creating kernel cl_kernel kernel = clCreateKernel(program, "saxpy", &error); // creating buffers static const size_t testDataSize = 10; // 2^10 std::vector a(testDataSize), b(testDataSize); int i = 1; for (auto it = a.begin(); it != a.end(); it++) { *it = i; i++; } cl_mem aBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * testDataSize, a.data(), &error ); cl_mem bBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * testDataSize, b.data(), &error ); // creating command-queue cl_command_queue queue = clCreateCommandQueue(context, deviceIds[0], 0, &error); // we're ready to finally run code clSetKernelArg(kernel, 0, sizeof(cl_mem), &aBuffer); clSetKernelArg(kernel, 1, sizeof(cl_mem), &bBuffer); static const float num = 2.0f; clSetKernelArg(kernel, 2, sizeof(float), &num); const size_t globalWorkSize[] = { testDataSize, 0, 0 }; clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr); clEnqueueReadBuffer(queue, bBuffer, CL_TRUE, 0, sizeof(float) * testDataSize, b.data(), 0, nullptr, nullptr); for (auto it = b.begin(); it != b.end(); it++) { std::cout << *it << " "; } // release used resources clReleaseCommandQueue(queue); clReleaseMemObject(bBuffer); clReleaseMemObject(aBuffer); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseContext(context); }