I created kernel in OpenCL and host in C++ using OpenCL library. Unfortunately, despite longs hours spent on solving the problem, I get the following error: OpenCL error: clCreateKernel (-45). Why? I tested sample project to check if it's not hardware problem: https://gist.github.com/jarutis/a64eaa38c1caaf7bc3d28cea64bb8359 It works perfectly -- I needed to change one thing in the code.
const char* kernel2Greedy = R"(
__kernel void parallelGreedy2(__global long* denominators, int numDenominators, double numberToExchange, __global char* result) {
for (int i = 0; i < numDenominators - 1; i++) {
for (int j = i + 1; j < numDenominators; j++) {
if (denominators[i] < denominators[j]) {
long temp = denominators[i];
denominators[i] = denominators[j];
denominators[j] = temp;
}
}
}
result[0] = '\0';
for (int i = 0; i < numDenominators && numberToExchange > 0; i++) {
if (numberToExchange >= denominators[i]) {
int biggestNumberToExchangeInLoop = (int)(numberToExchange / denominators[i]);
char temp[100];
snprintf(temp, sizeof(temp), "%ld cash x%d\n", denominators[i], biggestNumberToExchangeInLoop);
int offset = 0;
while (result[offset] != '\0') {
offset++;
}
for (int j = 0; temp[j] != '\0'; j++) {
result[offset++] = temp[j];
}
result[offset] = '\0';
numberToExchange = round(100 * (numberToExchange - (biggestNumberToExchangeInLoop * denominators[i]))) / 100.0;
}
}
}
)";
int main() {
std::cout << "Type a number of instances you want to create:" << std::endl;
int numberOfInstances2;
std::cin >> numberOfInstances2;
std::vector<long> denominators;
std::cout << "How many numbers you would like to add?" << std::endl;
int numberToAdd;
std::cin >> numberToAdd;
for (int i = 0; i < numberToAdd; i++) {
std::cout << "Add next number:" << std::endl;
long nextNumber;
std::cin >> nextNumber;
denominators.push_back(nextNumber);
}
// Setup OpenCL
try {
// device containers
cl::Buffer bufferDenominators;
cl::Buffer bufferResult;
cl::Buffer bufferNumberToExchange;
cl::Buffer bufferNumberOfDenominators;
// create a context
cl::Context context(CL_DEVICE_TYPE_GPU);
// create a program
cl::Program program(context, kernel2Greedy);
// get the command queue
cl::CommandQueue queue(context);
// create the kernel
cl::Kernel kernel(program, "parallelGreedy2");
// copy the data to the device
bufferDenominators = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(long) * denominators.size(), denominators.data());
bufferResult = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(char) * 1000 * numberOfInstances2);
bufferNumberToExchange = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(double), &numberToAdd);
bufferNumberOfDenominators = cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &numberToAdd);
// set kernel arguments
kernel.setArg(0, bufferDenominators);
kernel.setArg(1, bufferNumberOfDenominators);
kernel.setArg(2, bufferNumberToExchange);
kernel.setArg(3, bufferResult);
// run the kernel
queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(numberOfInstances2), cl::NullRange);
queue.finish();
// copy the data back
std::vector<char> result(numberOfInstances2 * 1000);
queue.enqueueReadBuffer(bufferResult, CL_TRUE, 0, sizeof(char) * 1000 * numberOfInstances2, result.data());
// Print results
for (int i = 0; i < numberOfInstances2; ++i) {
std::cout << "Result for instance " << i << ":" << std::endl;
std::cout << std::string(result.begin() + (i * 1000), result.begin() + ((i + 1) * 1000)) << std::endl;
}
} catch (cl::Error& e) {
std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")" << std::endl;
return 1;
}
return 0;
}
I still get OpenCL error: clCreateKernel (-45).
The algorithm itself works properly (checked).
What do I miss?
Error -45
indicates that the OpenCL C code has not been compiled to executable code. Indeed, program.build({ device }, "");
is missing, along with the code to select a device (GPU):
vector<cl::Device> cl_devices;
vector<cl::Platform> cl_platforms; // get all platforms (drivers)
cl::Platform::get(&cl_platforms);
for(int i=0; i<(int)cl_platforms.size(); i++) {
vector<cl::Device> cl_devices_available;
cl_platforms[i].getDevices(CL_DEVICE_TYPE_ALL, &cl_devices_available);
for(int j=0; j<(int)cl_devices_available.size(); j++) {
cl_devices.push_back(cl_devices_available[j]);
}
}
cl::Device device = cl_devices[0]; // select device here
Also, cl::Context context(device);
has to be initialized on this specific device you selected, not on the CL_DEVICE_TYPE_GPU
device type.
Do yourself a favor and use this lightweight OpenCL-Wrapper, it eliminates all of this code overhead and makes developing OpenCL a lot simpler.