copenclopencl-c

OpenCL: Invalid write of size # at global memory address 0x###


Post-solution edit: The issue is with the code alone. There is no hardware issue here. Now to the original post:

I'm trying to get a basic OpenCL program to work.

The program simply creates a buffer, writes 42 to the buffer, reads it, and outputs.

Here's the code, written in C:

#define MY_PLATFORM 2 // Adjustable
#define MY_DEVICE 0 // Adjustable

#include <stdio.h>

#define CL_TARGET_OPENCL_VERSION 200
#include <CL/cl.h>

// Simple kernel that outputs 42.
const char *myCode = " \
    __kernel void fourtyTwo(__global int *output) { \n \
        int i = get_global_id(0); \n \
        output[i] = 42; \n \
    } \n \
\0";

int main(void) {
    cl_platform_id *myPlatforms = (cl_platform_id*)malloc(sizeof(cl_platform_id));
    cl_uint *myPlatformCount = (cl_uint*)malloc(sizeof(cl_uint));

    cl_device_id *myDevices = (cl_device_id*)malloc(sizeof(cl_device_id));
    cl_uint *myDeviceCount = (cl_uint*)malloc(sizeof(cl_uint));

    int err;

    /* Reference:
        cl_int clGetPlatformIDs(
            cl_uint num_entries,       // I want just enough to reach MY_PLATFORM.
            cl_platform_id *platforms, // myPlatforms
            cl_uint *num_platforms     // myPlatformCount
        )
    */
    err = clGetPlatformIDs(MY_PLATFORM + 1, myPlatforms, myPlatformCount);
    if(err != 0) fprintf(stderr, "\nCould not query platforms.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_int clGetDeviceIDs(
            cl_platform_id num_entries, // I want the MY_PLATFORM'th entry of myPlatforms.
            cl_device_type device_type, // Any device will do.
            cl_uint num_entries,        // I want just enough to reach MY_DEVICE.
            cl_device_id *devices,      // myDevices
            cl_uint *num_devices        // myDeviceCount
        )
    */
    err = clGetDeviceIDs(myPlatforms[MY_PLATFORM], CL_DEVICE_TYPE_ALL, MY_DEVICE + 1, myDevices, myDeviceCount);
    if(err != 0) fprintf(stderr, "\nCould not query devices.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_context clCreateContext(
            cl_context_properties *properties, // The default properties should suffice.
            cl_uint num_devices,               // I want just enough to reach MY_DEVICE.
            const cl_device_id *devices,       // myDevices
            void *pfn_notify,                  // No need for this.
            void *user_data,                   // No need for this.
            cl_int *errcode_ret                // err
        )
    */
    cl_context myContext = clCreateContext(NULL, MY_DEVICE + 1, myDevices, NULL, NULL, &err);
    if(err != 0) fprintf(stderr, "\nCould not open context.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_command_queue clCreateCommandQueueWithProperties(
            cl_context context,                   // myContext
            cl_device_id device,                  // I want the MY_DEVICE'th device of myDevices.
            const cl_queue_properties properties, // The default properties should suffice.
            cl_int *errcode_ret                   // err
        )
    */
    cl_command_queue myCommandQueue = clCreateCommandQueueWithProperties(myContext, myDevices[MY_DEVICE], NULL, &err);
    if(err != 0) fprintf(stderr, "\nCould not open command queue.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_program clCreateProgramWithSource(
            cl_context context,    // myContext
            cl_uint count,         // There's only 1 source code.
            const char **strings,  // myCode, passed as a length-1 array.
            const size_t *lengths, // Passing NULL indicates that all the strings are null-terminated.
            cl_int *errcode_ret    // err
        )
    */
    cl_program myProgram = clCreateProgramWithSource(myContext, 1, (const char**)&myCode, NULL, &err);
    if(err != 0) fprintf(stderr, "\nCould not create program.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_int clBuildProgram(
            cl_program program,              // myProgram
            const cl_device_id *device_list, // myDevices
            const char *options,             // This can be NULL, right? The docs didn't specify.
            void *pfn_notify,                // No need for this.
            void *user_data                  // No need for this.
        )
    */
    err = clBuildProgram(myProgram, MY_DEVICE + 1, myDevices, NULL, NULL, NULL);
    if(err != 0) {
        fprintf(stderr, "\nCould not build program.\nOpenCL failed with exit code %d\n\n", err);
        char *errLog;
        size_t errLen;
        clGetProgramBuildInfo(myProgram, myDevices[MY_DEVICE], CL_PROGRAM_BUILD_LOG, 0, NULL, &errLen);
        errLog = (char*)malloc((errLen + 1) * sizeof(char));
        clGetProgramBuildInfo(myProgram, myDevices[MY_DEVICE], CL_PROGRAM_BUILD_LOG, errLen, errLog, NULL);
        errLog[errLen] = 0;
        fprintf(stderr, "\nFull Build Log:\n%s\n\n", errLog);
    }

    /* Reference:
        cl_kernel clCreateKernel(
            cl_program program,      // myProgram
            const char *kernel_name, // "fourtyTwo" (See above, definition of myCode)
            cl_int *errcode_ret      // err
        )
    */
    cl_kernel myKernel = clCreateKernel(myProgram, "fourtyTwo", &err);
    if(err != 0) fprintf(stderr, "\nCould not create kernel.\nOpenCL failed with exit code %d\n\n", err);

    size_t *globalSize = (size_t*)malloc(sizeof(size_t));
    *globalSize = 1; // There is only 1 item.
    size_t *localSize = (size_t*)malloc(sizeof(size_t));
    *localSize = 1; // There can only be 1 out of 1 item.

    int *outputArr = (int*)malloc(1 * sizeof(int));

    /* Reference:
        cl_mem clCreateBuffer(
            cl_context context, // myContext
            cl_mem_flags flags, // I'm only writing to the output array.
            size_t size,        // Its only 1 integer.
            void *host_ptr,     // I haven't allocated this space yet.
            cl_int *errcode_ret // err
        )
    */
    cl_mem outputBuffer = clCreateBuffer(myContext, CL_MEM_WRITE_ONLY, 1 * sizeof(int), NULL, &err);
    if(err != 0) fprintf(stderr, "\nCould not create buffer.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_int clSetKernelArg(
            cl_kernel kernel,     // myKernel
            cl_uint arg_index,    // Setting the 1st argument.
            size_t arg_size,      // Passing 1 cl_mem object.
            const void *arg_value // The argument shall be the output buffer for the kernel to write to.
        )
    */
    err = clSetKernelArg(myKernel, 0, sizeof(cl_mem), (void*)outputBuffer);
    if(err != 0) fprintf(stderr, "\nCould not set kernel argument.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_int clEnqueueNDRangeKernel(
            cl_command_queue command_queue,   // myCommandQueue
            cl_kernel kernel,                 // myKernel
            cl_uint work_dim,                 // 1 dimensional.
            const size_t *global_work_offset, // Don't offset anything.
            const size_t *global_work_size,   // globalSize
            const size_t *local_work_size,    // localSize
            cl_uint num_events_in_wait_list,  // I don't have a wait list (whatever that is).
            const cl_event *event_wait_list,  // I don't have a wait list (whatever that is).
            cl_event *event                   // Don't create an event (whatever this means).
        )
    */
    err = clEnqueueNDRangeKernel(myCommandQueue, myKernel, 1, 0, globalSize, localSize, 0, NULL, NULL);
    if(err != 0) fprintf(stderr, "\nCould not run kernel.\nOpenCL failed with exit code %d\n\n", err);

    /* Reference:
        cl_int clEnqueueReadBuffer(
            cl_command_queue command_queue,  // myCommandQueue
            cl_mem buffer,                   // outputBuffer
            cl_bool blocking_read,           // I'm doing everything synchronously.
            size_t offset,                   // Don't offset anything.
            size_t cb,                       // Reading in 1 integer.
            void *ptr,                       // Putting the data into outputArr.
            cl_uint num_events_in_wait_list, // I don't have a wait list (whatever that is).
            const cl_event *event_wait_list, // I don't have a wait list (whatever that is).
            cl_event *event                  // Don't create an event (whatever this means).
        )
    */
    clEnqueueReadBuffer(myCommandQueue, outputBuffer, CL_TRUE, 0, 1 * sizeof(cl_int), outputArr, 0, NULL, NULL);
    if(err != 0) fprintf(stderr, "\nCould not read from buffer.\nOpenCL failed with exit code %d\n\n", err);

    // Print the result
    printf("%d\n", outputArr[0]);
    return 0;
}

The program does not work. It leaves the buffer unchanged and throws an error through oclgrind.

Here is the output:

gcc main.c -o main -lOpenCL
./main

Invalid write of size 4 at global memory address 0x560d0b2cefc0
        Kernel: fourtyTwo
        Entity: Global(0,0,0) Local(0,0,0) Group(0,0,0)
          store i32 42, i32 addrspace(1)* %arrayidx, align 4, !dbg !23, !tbaa !24
        At line 3 (column 15) of input.cl:
          output[i] = 42;

0

Press ENTER or type command to continue

In case this is a system issue, here is the output of clinfo.

I'm using an AMD Ryzen 7 PRO 4750U with Radeon Graphics (16) @ 1.7000Ghz.

I'm running Arch Linux.

Does anyone have a clue as to what the issue could be? Thanks!

P.S., any criticism of my code or my question wording, whether civil or not, is welcomed by me!


Solution

  • I am a buffoon. The error was on the line I went to set the kernel argument:

        err = clSetKernelArg(myKernel, 0, sizeof(cl_mem), (void*)outputBuffer);
    

    I typed (void*)outputBuffer when I should've written (void*)&outputBuffer. As a result of this typo, the kernel was trying to write to some arbitrary location instead of the actual position in memory of the buffer.