javaccudagpujcuda

Calculate Skintone using JCuda is not giving the right percentage


Im calculting skintone of an image in java.

  1. convert the pixel of Image in yCbCR.
  2. check if image pixel is in specific range, then its a skin color.
  3. calculate percentage by dividing it by total pixel.

Its working fine in CPU code, but when i convert it to GPU code, The pixel percentage is not coming right.

The confusing part for me was send the pixel data to GPU and get its r, g, b value in GPU.

So i follow JCuda Pixel Invert Example example to send pixel data. The difference is the example send pixel data in int[] array and I'm sending it in byte[] array.

Here the code.

import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;

import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

import ij.IJ;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;

public class SkinTone {

public static void CalculateSKintoneGPU(File file) throws IOException {
    BufferedImage bufferedImage = ImageIO.read(file);
    if (bufferedImage == null || bufferedImage.getData() == null)
        return;
    Raster raster = bufferedImage.getData();

    DataBuffer dataBuffer = raster.getDataBuffer();
    DataBufferByte dataBufferInt = (DataBufferByte)dataBuffer;
    byte[] pixels =  dataBufferInt.getData();

    int totalPixels = raster.getHeight() * raster.getWidth();

    CUfunction kernelFunction = initlize();

    int output[] = execute(kernelFunction, pixels, raster.getWidth(), raster.getHeight());
    // Flushing memory
    raster = null;
    bufferedImage.flush();
    bufferedImage = null;

    long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

    System.err.println("Skintone Using GPU: " + output[0]);
    System.err.println("Total Pixel Of GPU: " + totalPixels);
    System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}

static int[] execute(CUfunction kernelFunction, byte[] pixels, int w, int h) {
    // Allocate memory on the device, and copy the host data to the device
    int size = w * h * Sizeof.BYTE;
    CUdeviceptr pointer = new CUdeviceptr();
    cuMemAlloc(pointer, size);
    cuMemcpyHtoD(pointer, Pointer.to(pixels), size);

    int numElements = 9;
    int s = 0;
    // Allocate device output memory
    CUdeviceptr deviceOutput = new CUdeviceptr();
    cuMemAlloc(deviceOutput, numElements * Sizeof.INT);

    // Set up the kernel parameters: A pointer to an array
    // of pointers which point to the actual values.
    Pointer kernelParameters = Pointer.to(Pointer.to(pointer), Pointer.to(new int[] { w }),
            Pointer.to(new int[] { h }), Pointer.to(deviceOutput));

    // Call the kernel function
    int blockSize = 16;
    int gridSize = (Math.max(w, h) + blockSize - 1) / blockSize;
    cuLaunchKernel(kernelFunction, gridSize, gridSize, 1, // Grid dimension
            blockSize, blockSize, 1, // Block dimension
            0, null, // Shared memory size and stream
            kernelParameters, null // Kernel- and extra parameters
    );
    cuCtxSynchronize();

    // Allocate host output memory and copy the device output
    // to the host.
    int hostOutput[] = new int[numElements];
    cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, numElements * Sizeof.INT);

    // Clean up.
    cuMemFree(deviceOutput);
    cuMemFree(pointer);

    return hostOutput;
}

public static CUfunction initlize() {

    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);
    JNvrtc.setExceptionsEnabled(true);

    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Obtain the CUDA source code from the CUDA file
    String cuFileName = "Skintone.cu";
    String sourceCode = CudaUtils.readResourceAsString(cuFileName);
    if (sourceCode == null) {
        IJ.showMessage("Error", "Could not read the kernel source code");
    }

    // Create the kernel function
    return CudaUtils.createFunction(sourceCode, "skintone");
}

public static void CalculateSKintoneCPU(File file) throws IOException {
    BufferedImage bufferedImage = ImageIO.read(file);
    if (bufferedImage == null || bufferedImage.getData() == null)
        return;
    Raster raster = bufferedImage.getData();
    float[] rgb = new float[4];
    int totalPixels = raster.getHeight() * raster.getWidth();

    int skinTonePixels = 0;

    for (int x = 0; x < raster.getWidth(); x++) {
        for (int y = 0; y < raster.getHeight(); y++) {
            raster.getPixel(x, y, rgb);
            if (skintone(rgb)) {
                skinTonePixels++;
            }
        }
    }

    // Flushing memory
    raster = null;
    rgb = null;
    bufferedImage.flush();
    bufferedImage = null;

    long skintoneThreshold = Math.round(skinTonePixels / (double) totalPixels * 100.0);

    System.err.println("Skintone Using CPU: " + skinTonePixels);
    System.err.println("Total Pixel Of CPU: " + totalPixels);
    System.err.println("SKinTone Percentage Using CPU: " + skintoneThreshold + "%");
}

private static boolean skintone(float[] rgb) {
    float yCbCr[] = (float[]) convertRGBtoYUV(rgb);
    if ((yCbCr[1] >= 80 && yCbCr[1] <= 120) && (yCbCr[2] >= 133 && yCbCr[2] <= 173)) {
        return true;
    }
    return false;
}

private static float[] convertRGBtoYUV(float[] rgb) {
    final float[] yCbCr = new float[3];
    float r = rgb[0];
    float g = rgb[1];
    float b = rgb[2];

    yCbCr[0] = 16 + (0.299f * r) + (0.587f * g) + (0.144f * b);
    yCbCr[1] = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
    yCbCr[2] = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);

    return yCbCr;
}

public static void main(String[] args) throws IOException {
    File file = new File("C:\\Users\\Aqeel\\git\\jcuda-imagej-example\\src\\test\\resources\\lena512color.png");
    CalculateSKintoneCPU(file);
    CalculateSKintoneGPU(file);
}

}

Kernal File

    extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
    int x = threadIdx.x+blockIdx.x*blockDim.x;
    int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
    float r, g, b;
    float cb, cr;

    int index = y*w+x;
    uchar4 pixel = data[index];

    r = pixel.x;
    g = pixel.y;
    b = pixel.z;

    cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
    cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


    if((cb >= 80 &&  cb <= 120) && (cr >= 133 &&  cr <= 173)) {
        atomicAdd(&output[0], 1);
    }
}
}

Complete Example src, Machine Need Nvida Card, Cuda Toolkit V9 and Graphics Drivers


Solution

  • I solve the problem by hit and trial method. In the kernel i change the position of r with b, and the problem resolved, also instead of byte i have to send the code in int array in java.

    extern "C"
    __global__ void skintone(uchar4* data, int w, int h, int* output)
    {
        int x = threadIdx.x+blockIdx.x*blockDim.x;
        int y = threadIdx.y+blockIdx.y*blockDim.y;
    
    if (x < w && y < h)
    {
        float b, g, r;
        float cb, cr;
    
        int index = y*w+x;
        uchar4 pixel = data[index];
    
        b = (float)pixel.x;
        g = (float)pixel.y;
        r = (float)pixel.z;
    
        cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
        cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);
    
    
        if((cb >= 80 &&  cb <= 120) && (cr >= 133 &&  cr <= 173)) {
            atomicAdd(&output[0], 1);
        }
    }
    }
    

    Java Code Changes.

    public static void calculateSkintoneGPU() throws IOException {
        BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
        if (img == null || img.getData() == null)
            return;
    
        int width = img.getWidth(null);
        int height = img.getHeight(null);
        int[] pixels = new int[width * height];
        PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
        try {
            pg.grabPixels();
        } catch (InterruptedException e){};
    
        int totalPixels = width * height;
    
        CUfunction kernelFunction = initlize();
    
        int output[] = execute(kernelFunction, pixels, width, height);
        // Flushing memory
        img.flush();
        img = null;
    
        long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);
    
        System.err.println("Skintone Using GPU: " + output[0]);
        System.err.println("Total Pixel Of GPU: " + totalPixels);
        System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
    }