cstructgpgpuopenaccpgcc

openacc error when assigning values to dynamically allocated struct member array of struct referenced by pointer


I am trying to wrap my head around combining openacc with pointers to structs containing dynamically allocated members. The code below fails with

Failing in Thread:1 call to cuStreamSynchronize returned error 700: Illegal address during kernel execution

when compiled using nvc ("nvc 20.9-0 LLVM 64-bit target on x86-64 Linux -tp haswell"). As far as I can tell I am following the approach suggested eg in the OpenACC 'getting started' guide. But somehow presumably the pointers don't stick (?) on the device. Does anyone know what goes wrong here?

#include <stdlib.h>
#include <stdio.h>

typedef struct grid
{
  int N;
  double *X;
} grid;

void allocate(grid* g, int N)
{
  g->N = N;
  g->X = (double*) malloc(sizeof(double) * g->N);

  #pragma acc enter data create(g[0:1])
  #pragma acc enter data create(g->X[0:N])
}

void release(grid* g)
{
  #pragma acc exit data delete(g->X[0:g->N])
  #pragma acc exit data delete(g[0:1])

  free(g->X);
}

void fill(grid * g)
{
  int i;

  #pragma acc parallel loop
  for (i = 0; i < g->N; i++)
  {
    g->X[i] = 42; // the cuprit, commenting this removes the error too
  }
}

int main()
{
  grid g;

  allocate(&g, 10);

  fill(&g);

  release(&g);

  return 0;
}```

Solution

  • From the compiler feedback messages you'll see something like:

         fill:
              32, Accelerator restriction: size of the GPU copy of g is unknown
                  Generating Tesla code
                  32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
              32, Generating implicit copyin(g) [if not already present]
              37, Generating update self(g->X[:g->N])
    

    The problem being that the compiler can't implicitly copy aggregate types with dynamic data members so you need to add a "present(g)" to indicate that g is already the device.

    Also, you'll want to copyin g in order to get the value of N on the device and no need to include the array shape in the exit data delete directive. For example:

    % cat test.c
    #include <stdlib.h>
    #include <stdio.h>
    
    typedef struct grid
    {
      int N;
      double *X;
    } grid;
    
    void allocate(grid* g, int N)
    {
      g->N = N;
      g->X = (double*) malloc(sizeof(double) * g->N);
    
      #pragma acc enter data copyin(g[0:1])
      #pragma acc enter data create(g->X[0:N])
    }
    
    void release(grid* g)
    {
      #pragma acc exit data delete(g->X)
      #pragma acc exit data delete(g)
    
      free(g->X);
    }
    
    void fill(grid * g)
    {
      int i;
    
      #pragma acc parallel loop present(g)
      for (i = 0; i < g->N; i++)
      {
        g->X[i] = 42; // the cuprit, commenting this removes the error too
      }
      #pragma acc update self(g->X[:g->N])
      for (i = 0; i < 4; i++)
      {
        printf("%d : %f \n",i,g->X[i]);
      }
    }
    
    int main()
    {
      grid g;
    
      allocate(&g, 10);
    
      fill(&g);
    
      release(&g);
    
      return 0;
    }
    
    % nvc -acc test.c -Minfo=accel -V20.9 ; a.out
    allocate:
         17, Generating enter data copyin(g[:1])
             Generating enter data create(g->X[:N])
    release:
         24, Generating exit data delete(g[:1],g->X[:1])
    fill:
         32, Generating present(g[:1])
             Generating Tesla code
             32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
         37, Generating update self(g->X[:g->N])
    0 : 42.000000
    1 : 42.000000
    2 : 42.000000
    3 : 42.000000