c++arrayscudaunified-memory

printing array from unified memory on cuda device doesn`t work


I try to create some hashes on a cuda device and printing them on the host. But at the printf on the host i am getting a read error at position 0x000000000100002F

The relevant lines look like this:

int main() {
const int block_size = 2;
const int num_blocks = 256;
const int N = block_size * num_blocks;

unsigned char** hashes;

cudaMallocManaged(&hashes, N * (32 * sizeof(unsigned char)));
cudaMemset(hashes, 0, N * (32 * sizeof(unsigned char)));

On the device

__global__ void sha256_kernel(unsigned char **dhashes){

    int idx = blockIdx.x * blockDim.x + threadIdx.x;


    sha256_final(&ctx, sha);
    dhashes[idx] = sha;
        //  printf("%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x\n", dhashes[idx][0], dhashes[idx][1], dhashes[idx][2], dhashes[idx][3], dhashes[idx][4], dhashes[idx][5], dhashes[idx][6], dhashes[idx][7], dhashes[idx][8], dhashes[idx][9], dhashes[idx][10], dhashes[idx][11], dhashes[idx][12], dhashes[idx][13], dhashes[idx][14], dhashes[idx][15],
        //      dhashes[idx][16], dhashes[idx][17], dhashes[idx][18], dhashes[idx][19], dhashes[idx][20], dhashes[idx][21], dhashes[idx][22], dhashes[idx][23], dhashes[idx][24], dhashes[idx][25], dhashes[idx][26], dhashes[idx][27], dhashes[idx][28], dhashes[idx][29], dhashes[idx][30], dhashes[idx][31]);
        // printing here is correct
    }

And back on the host side...

sha256_kernel << < num_blocks, block_size>> > (hashes);
        cudaDeviceSynchronize();
        
        for (int i = 0; i < N; i++) {
            printf("%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x\n", hashes[i][0], hashes[i][1], hashes[i][2], hashes[i][3], hashes[i][4], hashes[i][5], hashes[i][6], hashes[i][7], hashes[i][8], hashes[i][9], hashes[i][10], hashes[i][11], hashes[i][12], hashes[i][13], hashes[i][14], hashes[i][15],
                hashes[i][16], hashes[i][17], hashes[i][18], hashes[i][19], hashes[i][20], hashes[i][21], hashes[i][22], hashes[i][23], hashes[i][24], hashes[i][25], hashes[i][26], hashes[i][27], hashes[i][28], hashes[i][29], hashes[i][30], hashes[i][31]);
    }//printing here doesn't work

Seems to be correct, but when I try to print the hashes at host, I get an read error?


Solution

  • The memory allocation you are using to hold the hashes is incorrect. To have an array of pointers to the memory for each hash, you would require memory allocated both for the array of pointers, and for the hashes themselves, so something like:

    unsigned char** hashes;
    unsigned char* buff;
    
    cudaMallocManaged(&hashes, N * sizeof(unsigned char*));
    cudaMallocManaged(&buff, N * (32 * sizeof(unsigned char)));
    cudaMemset(buff, 0, N * (32 * sizeof(unsigned char)));
    
    for(i=0; i<N; i++) hashes[i] = &buff[i*32];
    

    [ Disclaimer: written in browser, never compiled or tested, use at own risk ]