With my signal made by function 2*cos(5 * i) + 0.7*sin(3 * i)
i waiting for complex array contains the amplitudes and frequences:
...
"3 || cos: 0.0000 || sin:0.7000"
...
"5 || cos: 2.0000 || sin:0.0000"
...
Or the same. But i get absolutely another result. please advise why, and where i am wrong?
There is my code:
#include <cuda_runtime.h>
#include <cufft.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
typedef float2 Complex;
void excufft()
{
#define NX 256
#define BATCH 1
}
int main()
{
cufftHandle plan;
cufftComplex *data;
cudaMalloc((void**)&data, sizeof(cufftComplex)*NX*BATCH);
if (cudaGetLastError() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to allocate\n");
return 1;
}
Complex* h_signal = (Complex*)malloc(sizeof(Complex) * NX);
for (unsigned int i = 0; i < NX; ++i) {
h_signal[i].x = 0.3*cos(5 * i)+0.7*sin(2*i);
h_signal[i].y = 0;
}
if (cudaMemcpy(data, h_signal, NX, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: cudaMemcpy host to device failed");
return 1;
}
if (cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Plan creation failed");
return 1;
}
if (cufftExecC2C(plan, data, data, CUFFT_FORWARD) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: ExecC2C Forward failed");
return 1;
}
if (cudaDeviceSynchronize() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to synchronize\n");
return 1;
}
Complex* h_convolved_signal = h_signal;
for (unsigned int i = 0; i < NX; ++i) {
h_convolved_signal[i].x = 0;
h_convolved_signal[i].y = 0;
}
cudaMemcpy(h_convolved_signal, data, NX, cudaMemcpyDeviceToHost);
for (int i = 0; i<NX / 2 + 1; i++)
printf("%d || cos: %2.4f || sin: %2.4f\n", i, h_convolved_signal[i].x, h_convolved_signal[i].y);
cufftDestroy(plan);
cudaFree(data);
return 0;
}
And there is a result output:
0 || cos: 0.0669 || sin: 0.0000
1 || cos: 0.1567 || sin: 0.2224
2 || cos: 0.3738 || sin: 0.3182
3 || cos: 0.5919 || sin: 0.2345
4 || cos: 0.6839 || sin: 0.0229
5 || cos: 0.5961 || sin: -0.1905
6 || cos: 0.3796 || sin: -0.2779
7 || cos: 0.1607 || sin: -0.1850
8 || cos: 0.0676 || sin: 0.0380
9 || cos: 0.1558 || sin: 0.2642
10 || cos: 0.3750 || sin: 0.3647
11 || cos: 0.5981 || sin: 0.2832
12 || cos: 0.6951 || sin: 0.0697
13 || cos: 0.6089 || sin: -0.1481
14 || cos: 0.3894 || sin: -0.2387
15 || cos: 0.1653 || sin: -0.1442
16 || cos: 0.0695 || sin: 0.0852
17 || cos: 0.1605 || sin: 0.3194
18 || cos: 0.3877 || sin: 0.4244
19 || cos: 0.6194 || sin: 0.3407
20 || cos: 0.7197 || sin: 0.1198
21 || cos: 0.6285 || sin: -0.1045
22 || cos: 0.3987 || sin: -0.1942
23 || cos: 0.1664 || sin: -0.0889
24 || cos: 0.0719 || sin: 0.1572
25 || cos: 0.1758 || sin: 0.4046
26 || cos: 0.4215 || sin: 0.5107
27 || cos: 0.6657 || sin: 0.4147
28 || cos: 0.7627 || sin: 0.1765
29 || cos: 0.6517 || sin: -0.0556
30 || cos: 0.3970 || sin: -0.1322
31 || cos: 0.1526 || sin: 0.0068
32 || cos: 0.0000 || sin: 0.0000
33 || cos: 0.0000 || sin: 0.0000
34 || cos: 0.0000 || sin: 0.0000
...
This looks like i should make adittional signal preparation, or some postprocessing..
NX*sizeof(Complex)
for each case, instead of just NX.Using the simple digits for frequency and amplitude is wrong. I find this signal example in my experiments:
ampl * 0.8 * sin(freq * 24.56 * i / 1000)
And divide the result frequency by 100.
By the way, matlab fft and cufft are different methods, because matlab fft operates with real numbers, and cufft is complex.
And there is correct code:
#include <cuda_runtime.h>
#include <cufft.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
typedef float2 Complex;
void excufft()
{
#define NX 256
#define BATCH 1
}
int main()
{
float mulampl=0.8;
float mulfreq=24.56;
float divfreq = 100;
cufftHandle plan;
cufftComplex *data;
cudaMalloc((void**)&data, sizeof(cufftComplex)*NX*BATCH);
if (cudaGetLastError() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to allocate\n");
return 1;
}
Complex* h_signal = (Complex*)malloc(sizeof(Complex) * NX);
for (unsigned int i = 0; i < NX; i++) {
h_signal[i].x = 17 * mulampl * sin(100 * mulfreq * i / 1000) + 5 * mulampl * cos(70 * mulfreq * i / 1000);
h_signal[i].y = 0;
}
if (cudaMemcpy(data, h_signal, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyHostToDevice) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: cudaMemcpy host to device failed");
return 1;
}
if (cufftPlan1d(&plan, NX, CUFFT_C2C, BATCH) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: Plan creation failed");
return 1;
}
if (cufftExecC2C(plan, data, data, CUFFT_FORWARD) != CUFFT_SUCCESS) {
fprintf(stderr, "CUFFT error: ExecZ2Z Forward failed");
return 1;
}
if (cudaDeviceSynchronize() != cudaSuccess) {
fprintf(stderr, "Cuda error: Failed to synchronize\n");
return 1;
}
Complex* h_convolved_signal = h_signal;
for (unsigned int i = 0; i < NX; ++i) {
h_convolved_signal[i].x = 0;
h_convolved_signal[i].y = 0;
}
cudaMemcpy(h_convolved_signal, data, sizeof(cufftComplex)*NX*BATCH, cudaMemcpyDeviceToHost);
for (int i = 0; i<NX/2; i++)
printf("%d || cos: %2.4f || sin: %2.4f\n", i, h_convolved_signal[i].x/divfreq, h_convolved_signal[i].y/divfreq);
cufftDestroy(plan);
cudaFree(data);
return 0;
}
output:
0 || cos: -0.0197 || sin: 0.0000
1 || cos: -0.0197 || sin: -0.0004
2 || cos: -0.0197 || sin: -0.0007
3 || cos: -0.0197 || sin: -0.0011
4 || cos: -0.0196 || sin: -0.0014
5 || cos: -0.0196 || sin: -0.0018
6 || cos: -0.0196 || sin: -0.0022
7 || cos: -0.0196 || sin: -0.0025
8 || cos: -0.0196 || sin: -0.0029
9 || cos: -0.0196 || sin: -0.0033
10 || cos: -0.0195 || sin: -0.0036
11 || cos: -0.0195 || sin: -0.0040
12 || cos: -0.0194 || sin: -0.0044
13 || cos: -0.0194 || sin: -0.0048
14 || cos: -0.0193 || sin: -0.0052
15 || cos: -0.0193 || sin: -0.0055
16 || cos: -0.0192 || sin: -0.0059
17 || cos: -0.0192 || sin: -0.0063
18 || cos: -0.0191 || sin: -0.0067
19 || cos: -0.0190 || sin: -0.0071
20 || cos: -0.0189 || sin: -0.0075
21 || cos: -0.0189 || sin: -0.0078
22 || cos: -0.0188 || sin: -0.0082
23 || cos: -0.0187 || sin: -0.0087
24 || cos: -0.0186 || sin: -0.0091
25 || cos: -0.0185 || sin: -0.0095
26 || cos: -0.0184 || sin: -0.0099
27 || cos: -0.0183 || sin: -0.0103
28 || cos: -0.0181 || sin: -0.0107
29 || cos: -0.0180 || sin: -0.0111
30 || cos: -0.0179 || sin: -0.0116
31 || cos: -0.0177 || sin: -0.0120
32 || cos: -0.0175 || sin: -0.0125
33 || cos: -0.0174 || sin: -0.0129
34 || cos: -0.0172 || sin: -0.0134
35 || cos: -0.0170 || sin: -0.0138
36 || cos: -0.0168 || sin: -0.0143
37 || cos: -0.0166 || sin: -0.0148
38 || cos: -0.0164 || sin: -0.0153
39 || cos: -0.0161 || sin: -0.0157
40 || cos: -0.0158 || sin: -0.0162
41 || cos: -0.0156 || sin: -0.0167
42 || cos: -0.0153 || sin: -0.0173
43 || cos: -0.0149 || sin: -0.0178
44 || cos: -0.0145 || sin: -0.0183
45 || cos: -0.0141 || sin: -0.0189
46 || cos: -0.0137 || sin: -0.0194
47 || cos: -0.0133 || sin: -0.0200
48 || cos: -0.0128 || sin: -0.0205
49 || cos: -0.0123 || sin: -0.0211
50 || cos: -0.0116 || sin: -0.0217
51 || cos: -0.0110 || sin: -0.0223
52 || cos: -0.0102 || sin: -0.0229
53 || cos: -0.0094 || sin: -0.0236
54 || cos: -0.0085 || sin: -0.0242
55 || cos: -0.0075 || sin: -0.0248
56 || cos: -0.0063 || sin: -0.0255
57 || cos: -0.0049 || sin: -0.0261
58 || cos: -0.0034 || sin: -0.0268
59 || cos: -0.0015 || sin: -0.0274
60 || cos: 0.0007 || sin: -0.0280
61 || cos: 0.0034 || sin: -0.0286
62 || cos: 0.0067 || sin: -0.0292
63 || cos: 0.0110 || sin: -0.0296
64 || cos: 0.0166 || sin: -0.0299
65 || cos: 0.0244 || sin: -0.0299
66 || cos: 0.0361 || sin: -0.0294
67 || cos: 0.0552 || sin: -0.0279
68 || cos: 0.0931 || sin: -0.0237
69 || cos: 0.2029 || sin: -0.0089
70 || cos: 5.0262 || sin: 0.6993
71 || cos: -0.2672 || sin: -0.0812
72 || cos: -0.1410 || sin: -0.0643
73 || cos: -0.1000 || sin: -0.0601
74 || cos: -0.0797 || sin: -0.0590
75 || cos: -0.0674 || sin: -0.0592
76 || cos: -0.0591 || sin: -0.0602
77 || cos: -0.0531 || sin: -0.0617
78 || cos: -0.0484 || sin: -0.0636
79 || cos: -0.0447 || sin: -0.0659
80 || cos: -0.0415 || sin: -0.0684
81 || cos: -0.0388 || sin: -0.0713
82 || cos: -0.0364 || sin: -0.0747
83 || cos: -0.0342 || sin: -0.0784
84 || cos: -0.0321 || sin: -0.0827
85 || cos: -0.0301 || sin: -0.0875
86 || cos: -0.0281 || sin: -0.0930
87 || cos: -0.0260 || sin: -0.0993
88 || cos: -0.0238 || sin: -0.1067
89 || cos: -0.0214 || sin: -0.1153
90 || cos: -0.0187 || sin: -0.1257
91 || cos: -0.0156 || sin: -0.1383
92 || cos: -0.0120 || sin: -0.1540
93 || cos: -0.0074 || sin: -0.1740
94 || cos: -0.0015 || sin: -0.2005
95 || cos: 0.0065 || sin: -0.2374
96 || cos: 0.0183 || sin: -0.2923
97 || cos: 0.0377 || sin: -0.3828
98 || cos: 0.0754 || sin: -0.5605
99 || cos: 0.1835 || sin: -1.0706
100 || cos: 3.5418 || sin: -16.9244
101 || cos: -0.2941 || sin: 1.1851
102 || cos: -0.1622 || sin: 0.5624
103 || cos: -0.1202 || sin: 0.3639
104 || cos: -0.0994 || sin: 0.2660
105 || cos: -0.0871 || sin: 0.2077
106 || cos: -0.0789 || sin: 0.1688
107 || cos: -0.0731 || sin: 0.1409
108 || cos: -0.0688 || sin: 0.1199
109 || cos: -0.0654 || sin: 0.1035
110 || cos: -0.0627 || sin: 0.0902
111 || cos: -0.0605 || sin: 0.0793
112 || cos: -0.0587 || sin: 0.0700
113 || cos: -0.0572 || sin: 0.0620
114 || cos: -0.0560 || sin: 0.0551
115 || cos: -0.0549 || sin: 0.0490
116 || cos: -0.0540 || sin: 0.0435
117 || cos: -0.0532 || sin: 0.0385
118 || cos: -0.0525 || sin: 0.0339
119 || cos: -0.0520 || sin: 0.0297
120 || cos: -0.0515 || sin: 0.0258
121 || cos: -0.0510 || sin: 0.0221
122 || cos: -0.0507 || sin: 0.0186
123 || cos: -0.0504 || sin: 0.0153
124 || cos: -0.0502 || sin: 0.0121
125 || cos: -0.0500 || sin: 0.0090
126 || cos: -0.0499 || sin: 0.0059
127 || cos: -0.0498 || sin: 0.0029
Thanks all!