c++oopinheritancecudapolymorphism

Polymorphism alternatives when working with CUDA in C++


Edit: From the replies, I realized that what I really need is an alternative to virtual class methods in CUDA C++. I believe the solution proposed are better suited for my use case than the one proposed in How to implement device side CUDA virtual functions?, especially due to having to handle a vector of Virtual classes.

Original question: I am trying to write a path tracer in C++ using CUDA, I am now at a point in which I am trying to pass an array of Shapes to the render function but even though these have been copied to device memory, I get an illegal memory access when trying to access methods of the shapes. I might be something to do with the way I am structuring the classes and their inheritance.

I have a host function render that accepts a scene dscriptor and an image array

__host__ void render(const std::shared_ptr<Scene> &scene, uchar4 *image);

this function calls a kernel renderImage which accepts a number of parameters and in particular my array of Shapes

__global__ void renderImage(const uint16_t width, const uint16_t height,
                            uchar4 *image, const Vec3 origin,
                            const Vec3 pixel00, const Vec3 deltaU,
                            const Vec3 deltaV, const Shape **shapes,
                            const size_t num_shapes);

A shape is defined as

// shape.cuh
#pragma once

#include "cuda_path_tracer/ray.cuh"

class Shape {
public:
  Shape() = default;
  Shape(const Shape &) = default;
  __host__ __device__ Shape(Shape &&) = delete;
  auto operator=(const Shape &) -> Shape & = default;
  __host__ __device__ auto operator=(Shape &&) -> Shape & = delete;
  virtual ~Shape() = default;
  __host__ __device__ virtual auto hit(const Ray &r) const -> bool = 0;
  __host__ __device__ virtual auto getShapeType() const -> ShapeType = 0;
};

A shape derived from the abstract class looks like this:

// sphere.cuh
#pragma once

#include "shape.cuh"

class Sphere : public Shape {
public:
  __host__ __device__ Sphere(const Vec3 &center, float radius);
  __host__ __device__ auto hit(const Ray &r) const -> bool override;
  __host__ __device__ auto hitt() const -> bool override;
  __host__ __device__ auto getShapeType() const -> ShapeType override;

private:
  Vec3 center;
  float radius;
};

and is implemented like this

// sphere.cu
#include "cuda_path_tracer/sphere.cuh"

__host__ Sphere::Sphere(const Vec3 &center, const float radius)
    : Shape(), center(center), radius(radius) {}

__host__ __device__ auto Sphere::hit(const Ray &r) const -> bool {
  Vec3 const oc = r.getOrigin() - center;
  float const a = r.getDirection().dot(r.getDirection());
  float const b = 2.0f * oc.dot(r.getDirection());
  float const c = oc.dot(oc) - radius * radius;
  float const discriminant = b * b - 4 * a * c;
  return discriminant > 0;
}

Shapes are copied in device memory in the host's render function in the following way

const auto num_shapes = scene->getShapes().size();
const Shape **d_shapes;
CUDA_ERROR_CHECK(
    cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape *)));

Shape **h_shapes = new Shape *[num_shapes];

for (size_t i = 0; i < num_shapes; i++) {
  CUDA_ERROR_CHECK(cudaMalloc((void **)&h_shapes[i], sizeof(Shape)));
  CUDA_ERROR_CHECK(cudaMemcpy(h_shapes[i], scene->getShapes()[i],
                              sizeof(Shape), cudaMemcpyHostToDevice));
}
CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes, num_shapes * sizeof(Shape *),
                            cudaMemcpyHostToDevice));
delete[] h_shapes;

Inside of the renderImage kernel I have two device functions that are being called, one to get a ray corresponding to a pixel and one for the color corresponding to the ray, given the array of shapes, the getColor function looks like this

__device__ auto getColor(const Ray &ray, const Shape *const *shapes,
                         const size_t num_shapes) -> uchar4 {
  // Dummy implementation
  for (size_t i = 0; i < num_shapes; i++) {
    if (shapes[i]->hit(ray)) {
      return make_uchar4(1, 0, 0, UCHAR_MAX);
    }
  }
  return make_uchar4(0, 0, 1, UCHAR_MAX);
}

the problem is the shapes[i]->hit(ray) generates an Illegal memory access exception, why is that? And how can I fix it?


Solution

  • Thanks to the suggestion in the comments and in particular the tip of @Homer512 I have come to this solution, please tell me if you think it could be done better.

    // shape.cuh
    #pragma once
    
    #include <cuda/std/variant>
    #include "sphere.cuh"
    
    using Shape = cuda::std::variant<Sphere>;
    
    //sphere.cuh
    #pragma once
    
    #include "ray.cuh"
    #include "vec3.cuh"
    
    class Sphere {
    public:
      __host__ __device__ Sphere(const Vec3 &center, float radius);
      __device__ auto hit(const Ray &r) const -> bool;
    
    private:
      Vec3 center;
      float radius;
    };
    

    h_shapes initialized as

    const std::vector<Shape> &h_shapes = scene->getShapes();
    const size_t num_shapes = h_shapes.size();
    Shape *d_shapes;
    CUDA_ERROR_CHECK(cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape)));
    CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes.data(),
                                num_shapes * sizeof(Sphere),
                                cudaMemcpyHostToDevice));
    

    and finally the device function to get the color

    __device__ auto getColor(const Ray &ray, const ShapeD *shapes,
                             const size_t num_shapes) -> uchar4 {
      for (size_t i = 0; i < num_shapes; i++) {
        bool hit = cuda::std::visit(
            [&ray](const auto &shape) { return shape.hit(ray); }, shapes[i]);
    
        if (hit) {
          return make_uchar4(1, 0, 0, UCHAR_MAX);
        }
      }
      return make_uchar4(0, 0, 1, UCHAR_MAX);
    }