Edit: From the replies, I realized that what I really need is an alternative to virtual class methods in CUDA C++. I believe the solution proposed are better suited for my use case than the one proposed in How to implement device side CUDA virtual functions?, especially due to having to handle a vector of Virtual classes.
Original question: I am trying to write a path tracer in C++ using CUDA, I am now at a point in which I am trying to pass an array of Shapes to the render
function but even though these have been copied to device memory, I get an illegal memory access when trying to access methods of the shapes. I might be something to do with the way I am structuring the classes and their inheritance.
I have a host function render
that accepts a scene dscriptor and an image array
__host__ void render(const std::shared_ptr<Scene> &scene, uchar4 *image);
this function calls a kernel renderImage
which accepts a number of parameters and in particular my array of Shapes
__global__ void renderImage(const uint16_t width, const uint16_t height,
uchar4 *image, const Vec3 origin,
const Vec3 pixel00, const Vec3 deltaU,
const Vec3 deltaV, const Shape **shapes,
const size_t num_shapes);
A shape is defined as
// shape.cuh
#pragma once
#include "cuda_path_tracer/ray.cuh"
class Shape {
public:
Shape() = default;
Shape(const Shape &) = default;
__host__ __device__ Shape(Shape &&) = delete;
auto operator=(const Shape &) -> Shape & = default;
__host__ __device__ auto operator=(Shape &&) -> Shape & = delete;
virtual ~Shape() = default;
__host__ __device__ virtual auto hit(const Ray &r) const -> bool = 0;
__host__ __device__ virtual auto getShapeType() const -> ShapeType = 0;
};
A shape derived from the abstract class looks like this:
// sphere.cuh
#pragma once
#include "shape.cuh"
class Sphere : public Shape {
public:
__host__ __device__ Sphere(const Vec3 ¢er, float radius);
__host__ __device__ auto hit(const Ray &r) const -> bool override;
__host__ __device__ auto hitt() const -> bool override;
__host__ __device__ auto getShapeType() const -> ShapeType override;
private:
Vec3 center;
float radius;
};
and is implemented like this
// sphere.cu
#include "cuda_path_tracer/sphere.cuh"
__host__ Sphere::Sphere(const Vec3 ¢er, const float radius)
: Shape(), center(center), radius(radius) {}
__host__ __device__ auto Sphere::hit(const Ray &r) const -> bool {
Vec3 const oc = r.getOrigin() - center;
float const a = r.getDirection().dot(r.getDirection());
float const b = 2.0f * oc.dot(r.getDirection());
float const c = oc.dot(oc) - radius * radius;
float const discriminant = b * b - 4 * a * c;
return discriminant > 0;
}
Shapes are copied in device memory in the host's render
function in the following way
const auto num_shapes = scene->getShapes().size();
const Shape **d_shapes;
CUDA_ERROR_CHECK(
cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape *)));
Shape **h_shapes = new Shape *[num_shapes];
for (size_t i = 0; i < num_shapes; i++) {
CUDA_ERROR_CHECK(cudaMalloc((void **)&h_shapes[i], sizeof(Shape)));
CUDA_ERROR_CHECK(cudaMemcpy(h_shapes[i], scene->getShapes()[i],
sizeof(Shape), cudaMemcpyHostToDevice));
}
CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes, num_shapes * sizeof(Shape *),
cudaMemcpyHostToDevice));
delete[] h_shapes;
Inside of the renderImage
kernel I have two device functions that are being called, one to get a ray corresponding to a pixel and one for the color corresponding to the ray, given the array of shapes, the getColor
function looks like this
__device__ auto getColor(const Ray &ray, const Shape *const *shapes,
const size_t num_shapes) -> uchar4 {
// Dummy implementation
for (size_t i = 0; i < num_shapes; i++) {
if (shapes[i]->hit(ray)) {
return make_uchar4(1, 0, 0, UCHAR_MAX);
}
}
return make_uchar4(0, 0, 1, UCHAR_MAX);
}
the problem is the shapes[i]->hit(ray)
generates an Illegal memory access
exception, why is that? And how can I fix it?
Thanks to the suggestion in the comments and in particular the tip of @Homer512 I have come to this solution, please tell me if you think it could be done better.
// shape.cuh
#pragma once
#include <cuda/std/variant>
#include "sphere.cuh"
using Shape = cuda::std::variant<Sphere>;
//sphere.cuh
#pragma once
#include "ray.cuh"
#include "vec3.cuh"
class Sphere {
public:
__host__ __device__ Sphere(const Vec3 ¢er, float radius);
__device__ auto hit(const Ray &r) const -> bool;
private:
Vec3 center;
float radius;
};
h_shapes
initialized as
const std::vector<Shape> &h_shapes = scene->getShapes();
const size_t num_shapes = h_shapes.size();
Shape *d_shapes;
CUDA_ERROR_CHECK(cudaMalloc((void **)&d_shapes, num_shapes * sizeof(Shape)));
CUDA_ERROR_CHECK(cudaMemcpy(d_shapes, h_shapes.data(),
num_shapes * sizeof(Sphere),
cudaMemcpyHostToDevice));
and finally the device function to get the color
__device__ auto getColor(const Ray &ray, const ShapeD *shapes,
const size_t num_shapes) -> uchar4 {
for (size_t i = 0; i < num_shapes; i++) {
bool hit = cuda::std::visit(
[&ray](const auto &shape) { return shape.hit(ray); }, shapes[i]);
if (hit) {
return make_uchar4(1, 0, 0, UCHAR_MAX);
}
}
return make_uchar4(0, 0, 1, UCHAR_MAX);
}