cudafftifft

CUDA Inverse FFT Bug


I have the following code that has bug when doing the inverse FFT. The forward FFT works as I printed the output and verified it. But the inverse does not seem to. Any ideas? Does it look like I have missed a concept?

Code - http://pastebin.com/iZYtdcqR

EDIT - I have essentially rewritten the code that comes with the CUDA toolkit samples. I am trying to perform a convolution using FFT but with a modified algorithm (DIF actually.)

EDIT2 - dding code to the question.

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#include <cuda_runtime.h>
#include <cufft.h>

typedef enum signaltype {REAL, COMPLEX} signal;

typedef float2 Complex;

void
printData(Complex *a, int size, char *msg) {

  if (msg == "") printf("\n");
  else printf("%s\n", msg);

  for (int i = 0; i < size; i++)
    printf("%f %f\n", a[i].x, a[i].y);
}

void
normData(Complex *a, int size, float norm) {

  for (int i = 0; i < size; i++) {
    a[i].x /= norm;
    a[i].y /= norm;
  }
}

void
randomFill(Complex *h_signal, int size, int flag) {

  // Real signal.
  if (flag == REAL) {
    for (int i = 0; i < size; i++) {
      h_signal[i].x = rand() / (float) RAND_MAX;
      h_signal[i].y = 0;
    }
  }
}

// FFT a signal that's on the _DEVICE_.
void
signalFFT(Complex *d_signal, int signal_size) {

  cufftHandle plan;
  if (cufftPlan1d(&plan, signal_size, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
    printf("Failed to plan FFT\n");
    exit(0);
  }

  // Execute the plan.
  if (cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_FORWARD) != CUFFT_SUCCESS) {
    printf ("Failed Executing FFT\n");
    exit(0);
  }

}

void
signalIFFT(Complex *d_signal, int signal_size) {

  cufftHandle plan;
  if (cufftPlan1d(&plan, signal_size, CUFFT_C2C, 1) != CUFFT_SUCCESS) {
    printf("Failed to plan IFFT\n");
    exit(0);
  }

  // Execute the plan.
  if (cufftExecC2C(plan, (cufftComplex *) d_signal, (cufftComplex *) d_signal, CUFFT_INVERSE) != CUFFT_SUCCESS) {
    printf ("Failed Executing IFFT\n");
    exit(0);
  }

}

int main()
{

  Complex *h_signal, *d_signal1;

  int alloc_size, i;

  alloc_size = 16;

  // Kernel Block and Grid Size.
  const dim3 blockSize(16, 16, 1);
  const dim3 gridSize(alloc_size / 16 + 1, alloc_size / 16 + 1, 1);

  h_signal = (Complex *) malloc(sizeof(Complex) * alloc_size);

  cudaMalloc(&d_signal1, sizeof(Complex) * alloc_size);
  if (cudaGetLastError() != cudaSuccess){
    printf("Cuda error: Failed to allocate\n");
    exit(0);
  }
  //cudaMalloc(&d_signal2, sizeof(Complex) * alloc_size);

  // Add random data to signal.
  randomFill(h_signal, alloc_size, REAL);
  printData(h_signal, alloc_size, "Random H1");

  cudaMemcpy(d_signal1, h_signal, sizeof(Complex) * alloc_size, cudaMemcpyHostToDevice);

  signalFFT(d_signal1, alloc_size);

  signalIFFT(d_signal1, alloc_size);

  cudaDeviceSynchronize();

  cudaMemcpy(h_signal, d_signal1, sizeof(Complex) * alloc_size, cudaMemcpyDeviceToHost);

  printData(h_signal, alloc_size, "IFFT");

  return 0;
}

Solution

  • Suggestions for writing a good question:

    Another note:

    Now regarding your question, I ran your code and got results like this:

    Random H1
    0.840188 0.000000
    0.394383 0.000000
    0.783099 0.000000
    0.798440 0.000000
    0.911647 0.000000
    0.197551 0.000000
    0.335223 0.000000
    0.768230 0.000000
    0.277775 0.000000
    0.553970 0.000000
    0.477397 0.000000
    0.628871 0.000000
    0.364784 0.000000
    0.513401 0.000000
    0.952230 0.000000
    0.916195 0.000000
    IFFT
    13.443005 0.000000
    6.310127 -0.000000
    12.529589 0.000000
    12.775041 0.000000
    14.586359 -0.000000
    3.160823 0.000000
    5.363565 0.000000
    12.291674 -0.000000
    4.444397 -0.000000
    8.863521 0.000000
    7.638353 0.000000
    10.061934 -0.000000
    5.836554 0.000000
    8.214415 -0.000000
    15.235678 -0.000000
    14.659121 -0.000000
    

    The only thing that seems to be missing from your code is that you didn't divide the results by the length of the transform (16 in this case) to get back the original data (as suggested in the sample code here). When I do that, I get what I believe is expected results:

    Random H1
    0.840188 0.000000
    0.394383 0.000000
    0.783099 0.000000
    0.798440 0.000000
    0.911647 0.000000
    0.197551 0.000000
    0.335223 0.000000
    0.768230 0.000000
    0.277775 0.000000
    0.553970 0.000000
    0.477397 0.000000
    0.628871 0.000000
    0.364784 0.000000
    0.513401 0.000000
    0.952230 0.000000
    0.916195 0.000000
    IFFT
    0.840188 0.000000
    0.394383 -0.000000
    0.783099 0.000000
    0.798440 0.000000
    0.911647 -0.000000
    0.197551 0.000000
    0.335223 0.000000
    0.768230 -0.000000
    0.277775 -0.000000
    0.553970 0.000000
    0.477397 0.000000
    0.628871 -0.000000
    0.364785 0.000000
    0.513401 -0.000000
    0.952230 -0.000000
    0.916195 -0.000000
    

    By the way, kudos for providing a complete, compilable code sample.