Post-solution edit: The issue is with the code alone. There is no hardware issue here. Now to the original post:
I'm trying to get a basic OpenCL program to work.
The program simply creates a buffer, writes 42 to the buffer, reads it, and outputs.
Here's the code, written in C:
#define MY_PLATFORM 2 // Adjustable
#define MY_DEVICE 0 // Adjustable
#include <stdio.h>
#define CL_TARGET_OPENCL_VERSION 200
#include <CL/cl.h>
// Simple kernel that outputs 42.
const char *myCode = " \
__kernel void fourtyTwo(__global int *output) { \n \
int i = get_global_id(0); \n \
output[i] = 42; \n \
} \n \
\0";
int main(void) {
cl_platform_id *myPlatforms = (cl_platform_id*)malloc(sizeof(cl_platform_id));
cl_uint *myPlatformCount = (cl_uint*)malloc(sizeof(cl_uint));
cl_device_id *myDevices = (cl_device_id*)malloc(sizeof(cl_device_id));
cl_uint *myDeviceCount = (cl_uint*)malloc(sizeof(cl_uint));
int err;
/* Reference:
cl_int clGetPlatformIDs(
cl_uint num_entries, // I want just enough to reach MY_PLATFORM.
cl_platform_id *platforms, // myPlatforms
cl_uint *num_platforms // myPlatformCount
)
*/
err = clGetPlatformIDs(MY_PLATFORM + 1, myPlatforms, myPlatformCount);
if(err != 0) fprintf(stderr, "\nCould not query platforms.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_int clGetDeviceIDs(
cl_platform_id num_entries, // I want the MY_PLATFORM'th entry of myPlatforms.
cl_device_type device_type, // Any device will do.
cl_uint num_entries, // I want just enough to reach MY_DEVICE.
cl_device_id *devices, // myDevices
cl_uint *num_devices // myDeviceCount
)
*/
err = clGetDeviceIDs(myPlatforms[MY_PLATFORM], CL_DEVICE_TYPE_ALL, MY_DEVICE + 1, myDevices, myDeviceCount);
if(err != 0) fprintf(stderr, "\nCould not query devices.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_context clCreateContext(
cl_context_properties *properties, // The default properties should suffice.
cl_uint num_devices, // I want just enough to reach MY_DEVICE.
const cl_device_id *devices, // myDevices
void *pfn_notify, // No need for this.
void *user_data, // No need for this.
cl_int *errcode_ret // err
)
*/
cl_context myContext = clCreateContext(NULL, MY_DEVICE + 1, myDevices, NULL, NULL, &err);
if(err != 0) fprintf(stderr, "\nCould not open context.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_command_queue clCreateCommandQueueWithProperties(
cl_context context, // myContext
cl_device_id device, // I want the MY_DEVICE'th device of myDevices.
const cl_queue_properties properties, // The default properties should suffice.
cl_int *errcode_ret // err
)
*/
cl_command_queue myCommandQueue = clCreateCommandQueueWithProperties(myContext, myDevices[MY_DEVICE], NULL, &err);
if(err != 0) fprintf(stderr, "\nCould not open command queue.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_program clCreateProgramWithSource(
cl_context context, // myContext
cl_uint count, // There's only 1 source code.
const char **strings, // myCode, passed as a length-1 array.
const size_t *lengths, // Passing NULL indicates that all the strings are null-terminated.
cl_int *errcode_ret // err
)
*/
cl_program myProgram = clCreateProgramWithSource(myContext, 1, (const char**)&myCode, NULL, &err);
if(err != 0) fprintf(stderr, "\nCould not create program.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_int clBuildProgram(
cl_program program, // myProgram
const cl_device_id *device_list, // myDevices
const char *options, // This can be NULL, right? The docs didn't specify.
void *pfn_notify, // No need for this.
void *user_data // No need for this.
)
*/
err = clBuildProgram(myProgram, MY_DEVICE + 1, myDevices, NULL, NULL, NULL);
if(err != 0) {
fprintf(stderr, "\nCould not build program.\nOpenCL failed with exit code %d\n\n", err);
char *errLog;
size_t errLen;
clGetProgramBuildInfo(myProgram, myDevices[MY_DEVICE], CL_PROGRAM_BUILD_LOG, 0, NULL, &errLen);
errLog = (char*)malloc((errLen + 1) * sizeof(char));
clGetProgramBuildInfo(myProgram, myDevices[MY_DEVICE], CL_PROGRAM_BUILD_LOG, errLen, errLog, NULL);
errLog[errLen] = 0;
fprintf(stderr, "\nFull Build Log:\n%s\n\n", errLog);
}
/* Reference:
cl_kernel clCreateKernel(
cl_program program, // myProgram
const char *kernel_name, // "fourtyTwo" (See above, definition of myCode)
cl_int *errcode_ret // err
)
*/
cl_kernel myKernel = clCreateKernel(myProgram, "fourtyTwo", &err);
if(err != 0) fprintf(stderr, "\nCould not create kernel.\nOpenCL failed with exit code %d\n\n", err);
size_t *globalSize = (size_t*)malloc(sizeof(size_t));
*globalSize = 1; // There is only 1 item.
size_t *localSize = (size_t*)malloc(sizeof(size_t));
*localSize = 1; // There can only be 1 out of 1 item.
int *outputArr = (int*)malloc(1 * sizeof(int));
/* Reference:
cl_mem clCreateBuffer(
cl_context context, // myContext
cl_mem_flags flags, // I'm only writing to the output array.
size_t size, // Its only 1 integer.
void *host_ptr, // I haven't allocated this space yet.
cl_int *errcode_ret // err
)
*/
cl_mem outputBuffer = clCreateBuffer(myContext, CL_MEM_WRITE_ONLY, 1 * sizeof(int), NULL, &err);
if(err != 0) fprintf(stderr, "\nCould not create buffer.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_int clSetKernelArg(
cl_kernel kernel, // myKernel
cl_uint arg_index, // Setting the 1st argument.
size_t arg_size, // Passing 1 cl_mem object.
const void *arg_value // The argument shall be the output buffer for the kernel to write to.
)
*/
err = clSetKernelArg(myKernel, 0, sizeof(cl_mem), (void*)outputBuffer);
if(err != 0) fprintf(stderr, "\nCould not set kernel argument.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_int clEnqueueNDRangeKernel(
cl_command_queue command_queue, // myCommandQueue
cl_kernel kernel, // myKernel
cl_uint work_dim, // 1 dimensional.
const size_t *global_work_offset, // Don't offset anything.
const size_t *global_work_size, // globalSize
const size_t *local_work_size, // localSize
cl_uint num_events_in_wait_list, // I don't have a wait list (whatever that is).
const cl_event *event_wait_list, // I don't have a wait list (whatever that is).
cl_event *event // Don't create an event (whatever this means).
)
*/
err = clEnqueueNDRangeKernel(myCommandQueue, myKernel, 1, 0, globalSize, localSize, 0, NULL, NULL);
if(err != 0) fprintf(stderr, "\nCould not run kernel.\nOpenCL failed with exit code %d\n\n", err);
/* Reference:
cl_int clEnqueueReadBuffer(
cl_command_queue command_queue, // myCommandQueue
cl_mem buffer, // outputBuffer
cl_bool blocking_read, // I'm doing everything synchronously.
size_t offset, // Don't offset anything.
size_t cb, // Reading in 1 integer.
void *ptr, // Putting the data into outputArr.
cl_uint num_events_in_wait_list, // I don't have a wait list (whatever that is).
const cl_event *event_wait_list, // I don't have a wait list (whatever that is).
cl_event *event // Don't create an event (whatever this means).
)
*/
clEnqueueReadBuffer(myCommandQueue, outputBuffer, CL_TRUE, 0, 1 * sizeof(cl_int), outputArr, 0, NULL, NULL);
if(err != 0) fprintf(stderr, "\nCould not read from buffer.\nOpenCL failed with exit code %d\n\n", err);
// Print the result
printf("%d\n", outputArr[0]);
return 0;
}
The program does not work. It leaves the buffer unchanged and throws an error through oclgrind.
Here is the output:
gcc main.c -o main -lOpenCL
./main
Invalid write of size 4 at global memory address 0x560d0b2cefc0
Kernel: fourtyTwo
Entity: Global(0,0,0) Local(0,0,0) Group(0,0,0)
store i32 42, i32 addrspace(1)* %arrayidx, align 4, !dbg !23, !tbaa !24
At line 3 (column 15) of input.cl:
output[i] = 42;
0
Press ENTER or type command to continue
In case this is a system issue, here is the output of clinfo
.
I'm using an AMD Ryzen 7 PRO 4750U with Radeon Graphics (16) @ 1.7000Ghz.
I'm running Arch Linux.
Does anyone have a clue as to what the issue could be? Thanks!
P.S., any criticism of my code or my question wording, whether civil or not, is welcomed by me!
I am a buffoon. The error was on the line I went to set the kernel argument:
err = clSetKernelArg(myKernel, 0, sizeof(cl_mem), (void*)outputBuffer);
I typed (void*)outputBuffer
when I should've written (void*)&outputBuffer
. As a result of this typo, the kernel was trying to write to some arbitrary location instead of the actual position in memory of the buffer.