FastLED 3.9.15
Loading...
Searching...
No Matches
kiss_fft.cpp.hpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
3 * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
4 *
5 * SPDX-License-Identifier: BSD-3-Clause
6 * See COPYING file for more information.
7 */
8
9#include "fl/stl/cstddef.h"
10#include "fl/stl/string.h"
11#include "fl/stl/cstring.h" // for fl::memset() and fl::memcpy()
12#include "fl/stl/noexcept.h"
13#include "_kiss_fft_guts.h"
14
15// Ensure NULL is defined (standard C macro)
16#ifndef NULL
17#define NULL 0
18#endif
19/* The guts header contains all the multiplication and addition macros that are defined for
20 fixed or floating point complex numbers. It also delares the kf_ internal functions.
21 */
22
23static void kf_bfly2(
24 kiss_fft_cpx * Fout,
25 const size_t fstride,
26 const kiss_fft_cfg st,
27 int m
29{
30 kiss_fft_cpx * Fout2;
31 kiss_fft_cpx * tw1 = st->twiddles;
33 Fout2 = Fout + m;
34 do{
35 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
36
37 C_MUL (t, *Fout2 , *tw1);
38 tw1 += fstride;
39 C_SUB( *Fout2 , *Fout , t );
40 C_ADDTO( *Fout , t );
41 ++Fout2;
42 ++Fout;
43 }while (--m);
44}
45
46static void kf_bfly4(
47 kiss_fft_cpx * Fout,
48 const size_t fstride,
49 const kiss_fft_cfg st,
50 const size_t m
52{
53 kiss_fft_cpx *tw1,*tw2,*tw3;
54 kiss_fft_cpx scratch[6];
55 size_t k=m;
56 const size_t m2=2*m;
57 const size_t m3=3*m;
58
59
60 tw3 = tw2 = tw1 = st->twiddles;
61
62 do {
63 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
64
65 C_MUL(scratch[0],Fout[m] , *tw1 );
66 C_MUL(scratch[1],Fout[m2] , *tw2 );
67 C_MUL(scratch[2],Fout[m3] , *tw3 );
68
69 C_SUB( scratch[5] , *Fout, scratch[1] );
70 C_ADDTO(*Fout, scratch[1]);
71 C_ADD( scratch[3] , scratch[0] , scratch[2] );
72 C_SUB( scratch[4] , scratch[0] , scratch[2] );
73 C_SUB( Fout[m2], *Fout, scratch[3] );
74 tw1 += fstride;
75 tw2 += fstride*2;
76 tw3 += fstride*3;
77 C_ADDTO( *Fout , scratch[3] );
78
79 if(st->inverse) {
80 Fout[m].r = scratch[5].r - scratch[4].i;
81 Fout[m].i = scratch[5].i + scratch[4].r;
82 Fout[m3].r = scratch[5].r + scratch[4].i;
83 Fout[m3].i = scratch[5].i - scratch[4].r;
84 }else{
85 Fout[m].r = scratch[5].r + scratch[4].i;
86 Fout[m].i = scratch[5].i - scratch[4].r;
87 Fout[m3].r = scratch[5].r - scratch[4].i;
88 Fout[m3].i = scratch[5].i + scratch[4].r;
89 }
90 ++Fout;
91 }while(--k);
92}
93
94static void kf_bfly3(
95 kiss_fft_cpx * Fout,
96 const size_t fstride,
97 const kiss_fft_cfg st,
98 size_t m
100{
101 size_t k=m;
102 const size_t m2 = 2*m;
103 kiss_fft_cpx *tw1,*tw2;
104 kiss_fft_cpx scratch[5];
105 kiss_fft_cpx epi3;
106 epi3 = st->twiddles[fstride*m];
107
108 tw1=tw2=st->twiddles;
109
110 do{
111 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
112
113 C_MUL(scratch[1],Fout[m] , *tw1);
114 C_MUL(scratch[2],Fout[m2] , *tw2);
115
116 C_ADD(scratch[3],scratch[1],scratch[2]);
117 C_SUB(scratch[0],scratch[1],scratch[2]);
118 tw1 += fstride;
119 tw2 += fstride*2;
120
121 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
122 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
123
124 C_MULBYSCALAR( scratch[0] , epi3.i );
125
126 C_ADDTO(*Fout,scratch[3]);
127
128 Fout[m2].r = Fout[m].r + scratch[0].i;
129 Fout[m2].i = Fout[m].i - scratch[0].r;
130
131 Fout[m].r -= scratch[0].i;
132 Fout[m].i += scratch[0].r;
133
134 ++Fout;
135 }while(--k);
136}
137
138static void kf_bfly5(
139 kiss_fft_cpx * Fout,
140 const size_t fstride,
141 const kiss_fft_cfg st,
142 int m
144{
145 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
146 int u;
147 kiss_fft_cpx scratch[13];
148 kiss_fft_cpx * twiddles = st->twiddles;
149 kiss_fft_cpx *tw;
150 kiss_fft_cpx ya,yb;
151 ya = twiddles[fstride*m];
152 yb = twiddles[fstride*2*m];
153
154 Fout0=Fout;
155 Fout1=Fout0+m;
156 Fout2=Fout0+2*m;
157 Fout3=Fout0+3*m;
158 Fout4=Fout0+4*m;
159
160 tw=st->twiddles;
161 for ( u=0; u<m; ++u ) {
162 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
163 scratch[0] = *Fout0;
164
165 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
166 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
167 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
168 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
169
170 C_ADD( scratch[7],scratch[1],scratch[4]);
171 C_SUB( scratch[10],scratch[1],scratch[4]);
172 C_ADD( scratch[8],scratch[2],scratch[3]);
173 C_SUB( scratch[9],scratch[2],scratch[3]);
174
175 Fout0->r += scratch[7].r + scratch[8].r;
176 Fout0->i += scratch[7].i + scratch[8].i;
177
178 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
179 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
180
181 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
182 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
183
184 C_SUB(*Fout1,scratch[5],scratch[6]);
185 C_ADD(*Fout4,scratch[5],scratch[6]);
186
187 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
188 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
189 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
190 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
191
192 C_ADD(*Fout2,scratch[11],scratch[12]);
193 C_SUB(*Fout3,scratch[11],scratch[12]);
194
195 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
196 }
197}
198
199/* perform the butterfly for one stage of a mixed radix FFT */
200static void kf_bfly_generic(
201 kiss_fft_cpx * Fout,
202 const size_t fstride,
203 const kiss_fft_cfg st,
204 int m,
205 int p
207{
208 int u,k,q1,q;
209 kiss_fft_cpx * twiddles = st->twiddles;
211 int Norig = st->nfft;
212
214
215 for ( u=0; u<m; ++u ) {
216 k=u;
217 for ( q1=0 ; q1<p ; ++q1 ) {
218 scratch[q1] = Fout[ k ];
219 C_FIXDIV(scratch[q1],p);
220 k += m;
221 }
222
223 k=u;
224 for ( q1=0 ; q1<p ; ++q1 ) {
225 int twidx=0;
226 Fout[ k ] = scratch[0];
227 for (q=1;q<p;++q ) {
228 twidx += fstride * k;
229 if (twidx>=Norig) twidx-=Norig;
230 C_MUL(t,scratch[q] , twiddles[twidx] );
231 C_ADDTO( Fout[ k ] ,t);
232 }
233 k += m;
234 }
235 }
236 KISS_FFT_TMP_FREE(scratch);
237}
238
239static
241 kiss_fft_cpx * Fout,
242 const kiss_fft_cpx * f,
243 const size_t fstride,
244 int in_stride,
245 int * factors,
246 const kiss_fft_cfg st
248{
249 kiss_fft_cpx * Fout_beg=Fout;
250 const int p=*factors++; /* the radix */
251 const int m=*factors++; /* stage's fft length/p */
252 const kiss_fft_cpx * Fout_end = Fout + p*m;
253
254#ifdef _OPENMP
255 // use openmp extensions at the
256 // top-level (not recursive)
257 if (fstride==1 && p<=5)
258 {
259 int k;
260
261 // execute the p different work units in different threads
262# pragma omp parallel for
263 for (k=0;k<p;++k)
264 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
265 // all threads have joined by this point
266
267 switch (p) {
268 case 2: kf_bfly2(Fout,fstride,st,m); break;
269 case 3: kf_bfly3(Fout,fstride,st,m); break;
270 case 4: kf_bfly4(Fout,fstride,st,m); break;
271 case 5: kf_bfly5(Fout,fstride,st,m); break;
272 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
273 }
274 return;
275 }
276#endif
277
278 if (m==1) {
279 do{
280 *Fout = *f;
281 f += fstride*in_stride;
282 }while(++Fout != Fout_end );
283 }else{
284 do{
285 // recursive call:
286 // DFT of size m*p performed by doing
287 // p instances of smaller DFTs of size m,
288 // each one takes every mth sample
289 // so0, s1, s2 become
290 // so, sm, s2m, s3m, ...
291 // s1, s(1+m), s(1+2m), ...
292 // s2, s(2+m), s(2+2m), ...
293 // etc.
294 kf_work( Fout , f, fstride*p, in_stride, factors,st);
295 f += fstride*in_stride;
296 }while( (Fout += m) != Fout_end );
297 }
298
299 Fout=Fout_beg;
300
301 // recombine the p smaller DFTs
302 switch (p) {
303 case 2: kf_bfly2(Fout,fstride,st,m); break;
304 case 3: kf_bfly3(Fout,fstride,st,m); break;
305 case 4: kf_bfly4(Fout,fstride,st,m); break;
306 case 5: kf_bfly5(Fout,fstride,st,m); break;
307 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
308 }
309}
310
311/* facbuf is populated by p1,m1,p2,m2, ...
312 where
313 p[i] * m[i] = m[i-1]
314 m0 = n */
315static
316void kf_factor(int n,int * facbuf) FL_NOEXCEPT
317{
318 int p=4;
319 double floor_sqrt;
320 floor_sqrt = fl::floor( fl::sqrt((double)n) );
321
322 /*factor out powers of 4, powers of 2, then any remaining primes */
323 do {
324 while (n % p) {
325 switch (p) {
326 case 4: p = 2; break;
327 case 2: p = 3; break;
328 default: p += 2; break;
329 }
330 if (p > floor_sqrt)
331 p = n; /* no more factors, skip to end */
332 }
333 n /= p;
334 *facbuf++ = p;
335 *facbuf++ = n;
336 } while (n > 1);
337}
338
339/*
340 *
341 * User-callable function to allocate all necessary storage space for the fft.
342 *
343 * The return value is a contiguous block of memory, cast to a kiss_fft_cfg.
344 *
345 * As such, the first cfg->sizeof_cfg bytes contain the configuration information.
346 * In a single-threaded program, subsequent calls to kiss_fft do not access this memory.
347 *
348 * The next section holds the twiddle factors for each stage.
349 *
350 * Note that the cfg is not guaranteed to be valid over time -- it should not be
351 * shared between threads and should not be used after a subsequent call to kiss_fft_alloc.
352 *
353 * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
354 * The returned value should be free()d when done to avoid memory leaks.
355 *
356 * The state can be placed in a user-supplied buffer 'mem':
357 * If mem==NULL, then a private internal buffer is allocated.
358 * If mem!=NULL, then mem must have size bytes. *lenmem gets set to the size of the cfg.
359 * On return, the first *lenmem words of mem will hold the configuration information.
360 * The remainder is scratchpad memory, safe to ignore or overwrite.
361 *
362 */
363kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) FL_NOEXCEPT
364{
366 size_t memneeded = sizeof(struct kiss_fft_state)
367 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
368
369 if ( lenmem==NULL ) {
370 st = (kiss_fft_cfg) KISS_FFT_MALLOC( memneeded );
371 }else{
372 if (mem != NULL && *lenmem >= memneeded)
373 st = (kiss_fft_cfg)mem;
374 *lenmem = memneeded;
375 }
376 if (st) {
377 int i;
378 st->nfft=nfft;
379 st->inverse = inverse_fft;
380
381 for (i=0;i<nfft;++i) {
382 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
383 double phase = -2*pi*i / nfft;
384 if (st->inverse)
385 phase *= -1;
386 kf_cexp(st->twiddles+i, phase );
387 }
388
390 }
391 return st;
392}
393
394
395void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) FL_NOEXCEPT
396{
397 if (fin == fout) {
398 //NOTE: this is not really an in-place FFT algorithm.
399 //It just performs an out-of-place FFT into a temp buffer
400 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
401 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
402 fl::memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
403 KISS_FFT_TMP_FREE(tmpbuf);
404 }else{
405 kf_work( fout, fin, 1, in_stride, st->factors,st );
406 }
407}
408
410{
411 kiss_fft_stride(cfg,fin,fout,1);
412}
413
414
416{
417 // nothing needed any more
418}
419
421{
422 while(1) {
423 int m=n;
424 while ( (m%2) == 0 ) m/=2;
425 while ( (m%3) == 0 ) m/=3;
426 while ( (m%5) == 0 ) m/=5;
427 if (m<=1)
428 break; /* n is completely factorable by twos, threes, and fives */
429 n++;
430 }
431 return n;
432}
#define C_FIXDIV(c, div)
#define HALF_OF(x)
#define C_ADD(res, a, b)
#define C_ADDTO(res, a)
#define C_SUB(res, a, b)
#define S_MUL(a, b)
#define C_MULBYSCALAR(c, s)
#define C_MUL(m, a, b)
#define KISS_FFT_TMP_FREE(ptr)
#define KISS_FFT_TMP_ALLOC(nbytes)
#define kf_cexp(x, phase)
int factors[2 *MAXFACTORS]
kiss_fft_cpx twiddles[1]
static uint32_t t
Definition Luminova.h:55
void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) FL_NOEXCEPT
#define NULL
void kiss_fft_cleanup(void) FL_NOEXCEPT
static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, const size_t m) FL_NOEXCEPT
static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) FL_NOEXCEPT
kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) FL_NOEXCEPT
void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin, kiss_fft_cpx *fout, int in_stride) FL_NOEXCEPT
static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, size_t m) FL_NOEXCEPT
int kiss_fft_next_fast_size(int n) FL_NOEXCEPT
static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m) FL_NOEXCEPT
static void kf_factor(int n, int *facbuf) FL_NOEXCEPT
static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f, const size_t fstride, int in_stride, int *factors, const kiss_fft_cfg st) FL_NOEXCEPT
static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride, const kiss_fft_cfg st, int m, int p) FL_NOEXCEPT
struct kiss_fft_state * kiss_fft_cfg
Definition kiss_fft.h:89
#define KISS_FFT_MALLOC(nbytes)
Definition kiss_fft.h:60
kiss_fft_scalar r
Definition kiss_fft.h:85
kiss_fft_scalar i
Definition kiss_fft.h:86
void * memcpy(void *dest, const void *src, size_t n) FL_NOEXCEPT
constexpr enable_if< is_fixed_point< T >::value, T >::type sqrt(T x) FL_NOEXCEPT
constexpr enable_if< is_fixed_point< T >::value, T >::type floor(T x) FL_NOEXCEPT
#define FL_NOEXCEPT