c++opencvgpuhalide

Halide with GPU schedule produce black image


I'm trying to learn Halide and I can't get GPU right, because it generate black images when scheduled for GPU. For CPU it produce good result (when comment out brighter.gpu_tile(x, y, xo, yo, xi, yi, 8, 8);)

#include "Halide.h"
#include "opencv2/core.hpp"
#include "opencv2/highgui.hpp"

void MinimalGpuExample() {
    cv::Mat img = cv::imread("test_in.bmp",cv::IMREAD_GRAYSCALE);

    Halide::Target target = Halide::get_host_target();
    target.set_feature(Halide::Target::CUDA);
    target.set_feature(Halide::Target::Debug);

    Halide::Buffer<uint8_t> buf(img.data, img.cols, img.rows,1);
    Halide::Func brighter;
    Halide::Var x, y, c;
    Halide::Expr value = buf(x, y, c);
    value = Halide::cast<float>(value);
    value = value * 1.5f;
    value = (Halide::min)(value, 255.0f);
    value = Halide::cast<uint8_t>(value);
    brighter(x, y, c) = value;
    Halide::Var xo, yo, xi, yi;
    brighter.gpu_tile(x, y, xo, yo, xi, yi, 8, 8);

    brighter.compile_jit(target);

    Halide::Buffer<uint8_t> output =
        brighter.realize(img.cols, img.rows, 1,target);

    output.copy_to_host();

    cv::Mat1b img_brither(img.rows, img.cols, output.data());

    cv::imwrite("test_out.bmp", img_brither);
}

int main()
{
    MinimalGpuExample();
}

My quess it's something with data transfer to GPU from buffer allocated on host, becouse that example print good values alongside with CUDA calls.

void MinimalGpuExampleWorking() {
    Halide::Func f;
    Halide::Var x, y, xo, xi, yo, yi;
    f(x, y) = x + y;
    f.gpu_tile(x, y, xo, yo, xi, yi, 16, 16);
    Halide::Target target = Halide::get_host_target();
    target.set_feature(Halide::Target::CUDA);
    target.set_feature(Halide::Target::Debug);
    f.compile_jit(target);
    // Run it.
    Halide::Buffer<int> result = f.realize(32, 32);
    // Print the result.
    for (int y = 0; y < result.height(); y++) {
        for (int x = 0; x < result.width(); x++) {
            printf("%3d ", result(x, y));
        }
        printf("\n");
    }
}

Solution

  • Try adding a buf.set_host_dirty() just after you construct it your input buffer. Halide isn't sure if that's uninitialized memory behind your pointer, or actual data that needs to be copied over when the GPU allocation is made.