FastLED 3.9.15
Loading...
Searching...
No Matches
kiss_fft.cpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
3 * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
4 *
5 * SPDX-License-Identifier: BSD-3-Clause
6 * See COPYING file for more information.
7 */
8
9#include "_kiss_fft_guts.h"
10/* The guts header contains all the multiplication and addition macros that are defined for
11 fixed or floating point complex numbers. It also delares the kf_ internal functions.
12 */
13
14static void kf_bfly2(
15 kiss_fft_cpx * Fout,
16 const size_t fstride,
17 const kiss_fft_cfg st,
18 int m
19 )
20{
21 kiss_fft_cpx * Fout2;
22 kiss_fft_cpx * tw1 = st->twiddles;
24 Fout2 = Fout + m;
25 do{
26 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
27
28 C_MUL (t, *Fout2 , *tw1);
29 tw1 += fstride;
30 C_SUB( *Fout2 , *Fout , t );
31 C_ADDTO( *Fout , t );
32 ++Fout2;
33 ++Fout;
34 }while (--m);
35}
36
37static void kf_bfly4(
38 kiss_fft_cpx * Fout,
39 const size_t fstride,
40 const kiss_fft_cfg st,
41 const size_t m
42 )
43{
44 kiss_fft_cpx *tw1,*tw2,*tw3;
45 kiss_fft_cpx scratch[6];
46 size_t k=m;
47 const size_t m2=2*m;
48 const size_t m3=3*m;
49
50
51 tw3 = tw2 = tw1 = st->twiddles;
52
53 do {
54 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
55
56 C_MUL(scratch[0],Fout[m] , *tw1 );
57 C_MUL(scratch[1],Fout[m2] , *tw2 );
58 C_MUL(scratch[2],Fout[m3] , *tw3 );
59
60 C_SUB( scratch[5] , *Fout, scratch[1] );
61 C_ADDTO(*Fout, scratch[1]);
62 C_ADD( scratch[3] , scratch[0] , scratch[2] );
63 C_SUB( scratch[4] , scratch[0] , scratch[2] );
64 C_SUB( Fout[m2], *Fout, scratch[3] );
65 tw1 += fstride;
66 tw2 += fstride*2;
67 tw3 += fstride*3;
68 C_ADDTO( *Fout , scratch[3] );
69
70 if(st->inverse) {
71 Fout[m].r = scratch[5].r - scratch[4].i;
72 Fout[m].i = scratch[5].i + scratch[4].r;
73 Fout[m3].r = scratch[5].r + scratch[4].i;
74 Fout[m3].i = scratch[5].i - scratch[4].r;
75 }else{
76 Fout[m].r = scratch[5].r + scratch[4].i;
77 Fout[m].i = scratch[5].i - scratch[4].r;
78 Fout[m3].r = scratch[5].r - scratch[4].i;
79 Fout[m3].i = scratch[5].i + scratch[4].r;
80 }
81 ++Fout;
82 }while(--k);
83}
84
85static void kf_bfly3(
86 kiss_fft_cpx * Fout,
87 const size_t fstride,
88 const kiss_fft_cfg st,
89 size_t m
90 )
91{
92 size_t k=m;
93 const size_t m2 = 2*m;
94 kiss_fft_cpx *tw1,*tw2;
95 kiss_fft_cpx scratch[5];
96 kiss_fft_cpx epi3;
97 epi3 = st->twiddles[fstride*m];
98
99 tw1=tw2=st->twiddles;
100
101 do{
102 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
103
104 C_MUL(scratch[1],Fout[m] , *tw1);
105 C_MUL(scratch[2],Fout[m2] , *tw2);
106
107 C_ADD(scratch[3],scratch[1],scratch[2]);
108 C_SUB(scratch[0],scratch[1],scratch[2]);
109 tw1 += fstride;
110 tw2 += fstride*2;
111
112 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
113 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
114
115 C_MULBYSCALAR( scratch[0] , epi3.i );
116
117 C_ADDTO(*Fout,scratch[3]);
118
119 Fout[m2].r = Fout[m].r + scratch[0].i;
120 Fout[m2].i = Fout[m].i - scratch[0].r;
121
122 Fout[m].r -= scratch[0].i;
123 Fout[m].i += scratch[0].r;
124
125 ++Fout;
126 }while(--k);
127}
128
129static void kf_bfly5(
130 kiss_fft_cpx * Fout,
131 const size_t fstride,
132 const kiss_fft_cfg st,
133 int m
134 )
135{
136 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
137 int u;
138 kiss_fft_cpx scratch[13];
139 kiss_fft_cpx * twiddles = st->twiddles;
140 kiss_fft_cpx *tw;
141 kiss_fft_cpx ya,yb;
142 ya = twiddles[fstride*m];
143 yb = twiddles[fstride*2*m];
144
145 Fout0=Fout;
146 Fout1=Fout0+m;
147 Fout2=Fout0+2*m;
148 Fout3=Fout0+3*m;
149 Fout4=Fout0+4*m;
150
151 tw=st->twiddles;
152 for ( u=0; u<m; ++u ) {
153 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
154 scratch[0] = *Fout0;
155
156 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
157 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
158 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
159 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
160
161 C_ADD( scratch[7],scratch[1],scratch[4]);
162 C_SUB( scratch[10],scratch[1],scratch[4]);
163 C_ADD( scratch[8],scratch[2],scratch[3]);
164 C_SUB( scratch[9],scratch[2],scratch[3]);
165
166 Fout0->r += scratch[7].r + scratch[8].r;
167 Fout0->i += scratch[7].i + scratch[8].i;
168
169 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
170 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
171
172 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
173 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
174
175 C_SUB(*Fout1,scratch[5],scratch[6]);
176 C_ADD(*Fout4,scratch[5],scratch[6]);
177
178 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
179 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
180 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
181 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
182
183 C_ADD(*Fout2,scratch[11],scratch[12]);
184 C_SUB(*Fout3,scratch[11],scratch[12]);
185
186 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
187 }
188}
189
190/* perform the butterfly for one stage of a mixed radix FFT */
191static void kf_bfly_generic(
192 kiss_fft_cpx * Fout,
193 const size_t fstride,
194 const kiss_fft_cfg st,
195 int m,
196 int p
197 )
198{
199 int u,k,q1,q;
200 kiss_fft_cpx * twiddles = st->twiddles;
202 int Norig = st->nfft;
203
205
206 for ( u=0; u<m; ++u ) {
207 k=u;
208 for ( q1=0 ; q1<p ; ++q1 ) {
209 scratch[q1] = Fout[ k ];
210 C_FIXDIV(scratch[q1],p);
211 k += m;
212 }
213
214 k=u;
215 for ( q1=0 ; q1<p ; ++q1 ) {
216 int twidx=0;
217 Fout[ k ] = scratch[0];
218 for (q=1;q<p;++q ) {
219 twidx += fstride * k;
220 if (twidx>=Norig) twidx-=Norig;
221 C_MUL(t,scratch[q] , twiddles[twidx] );
222 C_ADDTO( Fout[ k ] ,t);
223 }
224 k += m;
225 }
226 }
227 KISS_FFT_TMP_FREE(scratch);
228}
229
230static
232 kiss_fft_cpx * Fout,
233 const kiss_fft_cpx * f,
234 const size_t fstride,
235 int in_stride,
236 int * factors,
237 const kiss_fft_cfg st
238 )
239{
240 kiss_fft_cpx * Fout_beg=Fout;
241 const int p=*factors++; /* the radix */
242 const int m=*factors++; /* stage's fft length/p */
243 const kiss_fft_cpx * Fout_end = Fout + p*m;
244
245#ifdef _OPENMP
246 // use openmp extensions at the
247 // top-level (not recursive)
248 if (fstride==1 && p<=5)
249 {
250 int k;
251
252 // execute the p different work units in different threads
253# pragma omp parallel for
254 for (k=0;k<p;++k)
255 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
256 // all threads have joined by this point
257
258 switch (p) {
259 case 2: kf_bfly2(Fout,fstride,st,m); break;
260 case 3: kf_bfly3(Fout,fstride,st,m); break;
261 case 4: kf_bfly4(Fout,fstride,st,m); break;
262 case 5: kf_bfly5(Fout,fstride,st,m); break;
263 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
264 }
265 return;
266 }
267#endif
268
269 if (m==1) {
270 do{
271 *Fout = *f;
272 f += fstride*in_stride;
273 }while(++Fout != Fout_end );
274 }else{
275 do{
276 // recursive call:
277 // DFT of size m*p performed by doing
278 // p instances of smaller DFTs of size m,
279 // each one takes every mth sample
280 // so0, s1, s2 become
281 // so, sm, s2m, s3m, ...
282 // s1, s(1+m), s(1+2m), ...
283 // s2, s(2+m), s(2+2m), ...
284 // etc.
285 kf_work( Fout , f, fstride*p, in_stride, factors,st);
286 f += fstride*in_stride;
287 }while( (Fout += m) != Fout_end );
288 }
289
290 Fout=Fout_beg;
291
292 // recombine the p smaller DFTs
293 switch (p) {
294 case 2: kf_bfly2(Fout,fstride,st,m); break;
295 case 3: kf_bfly3(Fout,fstride,st,m); break;
296 case 4: kf_bfly4(Fout,fstride,st,m); break;
297 case 5: kf_bfly5(Fout,fstride,st,m); break;
298 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
299 }
300}
301
302/* facbuf is populated by p1,m1,p2,m2, ...
303 where
304 p[i] * m[i] = m[i-1]
305 m0 = n */
306static
307void kf_factor(int n,int * facbuf)
308{
309 int p=4;
310 double floor_sqrt;
311 floor_sqrt = floor( sqrt((double)n) );
312
313 /*factor out powers of 4, powers of 2, then any remaining primes */
314 do {
315 while (n % p) {
316 switch (p) {
317 case 4: p = 2; break;
318 case 2: p = 3; break;
319 default: p += 2; break;
320 }
321 if (p > floor_sqrt)
322 p = n; /* no more factors, skip to end */
323 }
324 n /= p;
325 *facbuf++ = p;
326 *facbuf++ = n;
327 } while (n > 1);
328}
329
330/*
331 *
332 * User-callable function to allocate all necessary storage space for the fft.
333 *
334 * The return value is a contiguous block of memory, cast to a kiss_fft_cfg.
335 *
336 * As such, the first cfg->sizeof_cfg bytes contain the configuration information.
337 * In a single-threaded program, subsequent calls to kiss_fft do not access this memory.
338 *
339 * The next section holds the twiddle factors for each stage.
340 *
341 * Note that the cfg is not guaranteed to be valid over time -- it should not be
342 * shared between threads and should not be used after a subsequent call to kiss_fft_alloc.
343 *
344 * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
345 * The returned value should be free()d when done to avoid memory leaks.
346 *
347 * The state can be placed in a user-supplied buffer 'mem':
348 * If mem==NULL, then a private internal buffer is allocated.
349 * If mem!=NULL, then mem must have size bytes. *lenmem gets set to the size of the cfg.
350 * On return, the first *lenmem words of mem will hold the configuration information.
351 * The remainder is scratchpad memory, safe to ignore or overwrite.
352 *
353 */
354kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
355{
356 kiss_fft_cfg st=NULL;
357 size_t memneeded = sizeof(struct kiss_fft_state)
358 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
359
360 if ( lenmem==NULL ) {
361 st = (kiss_fft_cfg) KISS_FFT_MALLOC( memneeded );
362 }else{
363 if (mem != NULL && *lenmem >= memneeded)
364 st = (kiss_fft_cfg)mem;
365 *lenmem = memneeded;
366 }
367 if (st) {
368 int i;
369 st->nfft=nfft;
370 st->inverse = inverse_fft;
371
372 for (i=0;i<nfft;++i) {
373 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
374 double phase = -2*pi*i / nfft;
375 if (st->inverse)
376 phase *= -1;
377 kf_cexp(st->twiddles+i, phase );
378 }
379
381 }
382 return st;
383}
384
385
386void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
387{
388 if (fin == fout) {
389 //NOTE: this is not really an in-place FFT algorithm.
390 //It just performs an out-of-place FFT into a temp buffer
392 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
393 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
394 KISS_FFT_TMP_FREE(tmpbuf);
395 }else{
396 kf_work( fout, fin, 1, in_stride, st->factors,st );
397 }
398}
399
401{
402 kiss_fft_stride(cfg,fin,fout,1);
403}
404
405
407{
408 // nothing needed any more
409}
410
412{
413 while(1) {
414 int m=n;
415 while ( (m%2) == 0 ) m/=2;
416 while ( (m%3) == 0 ) m/=3;
417 while ( (m%5) == 0 ) m/=5;
418 if (m<=1)
419 break; /* n is completely factorable by twos, threes, and fives */
420 n++;
421 }
422 return n;
423}
#define C_FIXDIV(c, div)
#define HALF_OF(x)
#define C_ADD(res, a, b)
#define C_ADDTO(res, a)
#define C_SUB(res, a, b)
#define S_MUL(a, b)
#define C_MULBYSCALAR(c, s)
#define C_MUL(m, a, b)
#define KISS_FFT_TMP_FREE(ptr)
#define KISS_FFT_TMP_ALLOC(nbytes)
#define kf_cexp(x, phase)
int factors[2 *MAXFACTORS]
kiss_fft_cpx twiddles[1]
static uint32_t t
Definition Luminova.h:54
static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m)
Definition kiss_fft.cpp:37
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m)
Definition kiss_fft.cpp:14
int kiss_fft_next_fast_size(int n)
Definition kiss_fft.cpp:411
static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p)
Definition kiss_fft.cpp:191
static void kf_factor(int n, int *facbuf)
Definition kiss_fft.cpp:307
static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, size_t m)
Definition kiss_fft.cpp:85
static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, const size_t fstride, int in_stride, int *factors, const kiss_fft_cfg st)
Definition kiss_fft.cpp:231
void kiss_fft_cleanup(void)
Definition kiss_fft.cpp:406
void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout)
Definition kiss_fft.cpp:400
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem)
Definition kiss_fft.cpp:354
void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int in_stride)
Definition kiss_fft.cpp:386
static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m)
Definition kiss_fft.cpp:129
#define KISS_FFT_MALLOC
Definition kiss_fft.h:53
struct kiss_fft_state * kiss_fft_cfg
Definition kiss_fft.h:77
kiss_fft_scalar r
Definition kiss_fft.h:73
kiss_fft_scalar i
Definition kiss_fft.h:74