I'm writing a program which is used to process the signal of GPS using CUDA.So I need to use fft,but an error occurred .
CUDA error at F:/clouddrive kingsoft/acc/accfinal/accfinal/acc.cu:341 code=2(CUF
FT_ALLOC_FAILED) "cufftPlan1d(&plan, new_size, CUFFT_C2C, 1)"
and the code is here.
double fft_Ifft_Sum(Complex *h_signal,Complex *h_filter_kernel,double* list,bool firstEnter)
{
double max=0;
int new_size=samplesPerCode;
int mem_size = sizeof(Complex) * new_size;
// Allocate device memory for signal
Complex *d_signal;
checkCudaErrors(cudaMalloc((void **)&d_signal, mem_size));
// Copy host memory to device
checkCudaErrors(cudaMemcpy(d_signal, h_signal, mem_size,cudaMemcpyHostToDevice));
// Allocate device memory for filter kernel
if(firstEnter)
{
checkCudaErrors(cudaFree(d_filter_kernel));
checkCudaErrors(cudaMalloc((void **)&d_filter_kernel, mem_size));
checkCudaErrors(cudaMemcpy(d_filter_kernel, h_filter_kernel, mem_size,
cudaMemcpyHostToDevice));
}
// CUFFT plan
cufftHandle plan;
checkCudaErrors(cufftPlan1d(&plan, new_size, CUFFT_C2C, 1));
// Transform signal and kernel
checkCudaErrors(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_FORWARD));
ComplexPointwiseMulAndScale_p<<<1024, 1024>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);
// Check if kernel execution generated and error
getLastCudaError("Kernel execution failed [ ComplexPointwiseMulAndScale ]");
// Transform signal back
//printf("Transforming signal back cufftExecC2C\n");
checkCudaErrors(cufftExecC2C(plan, (cufftComplex *)d_signal, (cufftComplex *)d_signal, CUFFT_INVERSE));
cudaThreadSynchronize();
// Copy device memory to host
Complex *h_convolved_signal = h_signal;
checkCudaErrors(cudaMemcpy(h_convolved_signal, d_signal, mem_size,
cudaMemcpyDeviceToHost));
// list=new double[new_size];
for(int i=0;i<new_size;i++)
{
list[i]=h_convolved_signal[i].x*h_convolved_signal[i].x+h_convolved_signal[i].y*h_convolved_signal[i].y;
if(list[i]>max)max=list[i];
}
return max;
//Destroy CUFFT context
checkCudaErrors(cufftDestroy(plan));
// cleanup memory
free(h_signal);
free(h_filter_kernel);
checkCudaErrors(cudaFree(d_signal));
checkCudaErrors(cudaFree(d_filter_kernel));
cudaDeviceReset();
}
and this function will be called in main for about 1900 times and it can run smoothly until about the 1440 times ,the error will occur here.I cannot figure out why. thanks.
You are allocating device memory for d_signal
every time you enter the function, but never freeing it. You have a return
statement in your function prior to any of the free
or destroy operations, so this looks like a problem to me if you are calling this function repeatedly.
I would think the compiler would be spitting out a warning about unreachable code, also, based on what you have shown.