I'm using the following code to test the CUDA NPP min-max function.
#include <string.h>
#include <fstream>
#include <iostream>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string>
#include <math.h>
#include <assert.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuComplex.h"
#include <cufft.h>
#include <cuda_runtime.h>
#include <npp.h>
#define Nz 256
#define Ny 280
int main(int argc, char** argv) {
struct cudaDeviceProp p;
cudaGetDeviceProperties(&p, 0);
printf("Device Name: %s\n", p.name);
Npp32f* d_img;
cudaMalloc((void**)&d_img, Nz*Ny * sizeof(Npp32f));
nppsSet_32f(1.0f, d_img, Nz*Ny);
int BufferSize;
Npp32f Max;
Npp32f Min;
nppsMinMaxGetBufferSize_32f(Nz*Ny,&BufferSize);
Npp8u *pScratch;
cudaMalloc((void **)(&pScratch), BufferSize);
nppsMinMax_32f(d_img,Nz*Ny,&Min,&Max,pScratch);
printf("Max:%g, Min:%g\n", (float)Max, (float)Min);
cudaFree(d_img);
cudaFree(pScratch);
}
All elements in the device array are set to 1, but I get the following output.
Max:1.12104e-44, Min:0
Regarding to the NPP documentation, Min
and Max
must be stored on the device memory (or at least accessible from the device) and not on the host memory like in your code. On way to fix that is to allocate some memory to store the min/max values and then transfer the values back to the host to print them like in the documentation example.