FastLED 3.9.15
Loading...
Searching...
No Matches
fft_backend.h
Go to the documentation of this file.
1#pragma once
2
3// fft_backend.h — FFT backend dispatcher for fl::audio::fft
4//
5// Centralizes the forward real-to-complex FFT call so the implementation can
6// select between:
7// - kiss_fftr (third_party, portable, default everywhere)
8// - ESP-DSP dsps_fft2r_fc32 (ESP32*, auto-detected via FL_HAS_INCLUDE("esp_dsp.h"))
9// - CMSIS-DSP arm_rfft_fast_f32 (ARM Cortex-M4/M7/M33, FUTURE)
10//
11// ----------------------------------------------------------------------------
12// Hardware-FFT API survey (2026, per issue #2308 research)
13// ----------------------------------------------------------------------------
14// There is NO universal standardized real-FFT API across the embedded
15// ecosystem. The two leading candidates have semantically different output
16// layouts:
17//
18// kiss_fftr (FastLED current): N real in → N/2+1 `kiss_fft_cpx`. DC in
19// bin[0] (imag=0), Nyquist in bin[N/2]
20// (imag=0). Total output = (N/2+1)*2 floats.
21//
22// ESP-DSP dsps_fft2r_fc32: In-place N-complex FFT. For real input we
23// do the packed N/2-point complex + manual
24// unpack trick (this file). Separate
25// dsps_fft2r_init_* allocates global twiddle
26// tables (once per process).
27// Sizes: power-of-2, ≤ CONFIG_DSP_MAX_FFT_SIZE.
28//
29// CMSIS-DSP arm_rfft_fast_f32: ARM's de facto "standard" for Cortex-M4+.
30// Packs DC into out[0].r AND Nyquist into
31// out[0].i — total output = N floats. Must
32// be unpacked to kiss_fftr shape. Sizes
33// restricted to {32,64,128,256,512,1024,
34// 2048,4096}. Typical 4-5× speedup on
35// Cortex-M4 with FPU vs kiss_fftr.
36//
37// Teensy Audio Library: Wraps CMSIS-DSP under the hood; exposes
38// only magnitude not raw complex.
39//
40// ARM Cortex-M0+ (RP2040 etc.): No FPU. CMSIS-DSP offers Q15/Q31 only —
41// different output semantics, require
42// post-scaling by 1/(N/2). Not drop-in.
43//
44// Host (x86_64/aarch64): FFTW (gold standard), Apple vDSP, pocketfft
45// — all vendor-specific layouts.
46//
47// ----------------------------------------------------------------------------
48// Decision: keep kiss_fftr's layout as FastLED's internal abstraction.
49// ----------------------------------------------------------------------------
50// Rationale: kiss_fft_cpx {float r, i;} and the N/2+1-bin half-spectrum is
51// already what every downstream audio consumer assumes (magnitude, binning,
52// CQ kernels, windowing, AudioContext cache). Changing it would require
53// rewriting all detectors + the ESP-DSP conversion glue ends up nearly as
54// complex as just keeping the shape. Each backend's conversion happens
55// privately inside `fl_fft_real_forward()`:
56//
57// kiss backend → identity (no conversion)
58// ESP-DSP backend → pack N reals as N/2 complex, complex FFT, unpack
59// conjugate-symmetric output to N/2+1 kiss_fft_cpx bins
60// CMSIS backend → call arm_rfft_fast_f32, then split out[0] into
61// DC and Nyquist positions expected by kiss layout
62//
63// ----------------------------------------------------------------------------
64// Auto-detect: ESP-DSP on ESP32 is typically 3-5× faster than kiss_fftr.
65// Expected cost on ESP32-S3: ~15 µs vs ~54 µs for the default 512-point
66// real FFT. See issue #2308 for the broader audio performance plan.
67//
68// Gate has moved from user opt-in (FL_FFT_USE_ESP_DSP=1) to automatic
69// `__has_include("esp_dsp.h")` detection — same pattern used for esp_cache.h
70// in src/platforms/esp/32/drivers/lcd_spi/. The ESP32 variants that do not
71// ship esp_dsp in their toolchain bundle (esp32c2 / esp32s2 / esp32h2 /
72// esp32c5) automatically fall through to the kiss_fftr scalar path; no per-
73// example `@filter` line is needed. See issue #2629 / PR #2625 for history.
74//
75// The math is the standard "real FFT from packed N/2-complex FFT" identity;
76// see inline comments on the unpack for derivation.
77
79#include "fl/stl/has_include.h"
80#include "platforms/is_platform.h"
81// IWYU pragma: begin_keep
83// IWYU pragma: end_keep
84
85// FL_FFT_ESP_DSP_AVAILABLE: ESP-DSP backend code is compiled in.
86// FL_FFT_ESP_DSP_ACTIVE: dispatcher routes real FFT calls through it.
87//
88// Both gate on `defined(FL_IS_ESP32) && FL_HAS_INCLUDE("esp_dsp.h")` so the
89// backend is silently elided on toolchains that do not provide esp_dsp.h
90// (esp32c2 / esp32s2 / esp32h2 / esp32c5). The AudioFftParity example
91// exercises the backend on any ESP32 board where the header is present.
92//
93// NOTE: the ESP-DSP implementation below is NOT yet bit-for-bit validated
94// against kiss_fftr — hardware sanity tests via AudioFftParity currently
95// report incorrect output (flat magnitudes for DC / single-tone inputs).
96// Root cause is under investigation; the dispatcher (`fl_fft_real_forward`)
97// stays on kiss_fftr by default. See issue #2308.
98#if defined(FL_IS_ESP32) && FL_HAS_INCLUDE("esp_dsp.h")
99#define FL_FFT_ESP_DSP_AVAILABLE 1
100#include "esp_dsp.h"
101#include "fl/stl/vector.h"
102#include "fl/log/log.h"
103#include "fl/math/math.h" // cosf / sinf wrappers
104#else
105#define FL_FFT_ESP_DSP_AVAILABLE 0
106#endif
107
108#define FL_FFT_ESP_DSP_ACTIVE FL_FFT_ESP_DSP_AVAILABLE
109
110namespace fl {
111namespace audio {
112namespace fft {
113
114#if FL_FFT_ESP_DSP_AVAILABLE
115
116namespace detail {
117
124struct EspDspRealCtx {
125 int n = 0; // full logical FFT size (N), power of 2
126 fl::vector<float> work; // packed complex buffer: size N (N/2 pairs)
127 fl::vector<float> cos_table; // cos(2πk/N) for k in [1, N/2), size N/2-1
128 fl::vector<float> sin_table; // sin(2πk/N) for k in [1, N/2), size N/2-1
129};
130
131inline EspDspRealCtx &espDspRealCtx() FL_NOEXCEPT {
132 static EspDspRealCtx ctx;
133 return ctx;
134}
135
136inline bool espDspGlobalInit() FL_NOEXCEPT {
137 static bool initialized = false;
138 if (initialized) return true;
139 esp_err_t err =
140 dsps_fft2r_init_fc32(nullptr, CONFIG_DSP_MAX_FFT_SIZE);
141 if (err != ESP_OK) {
142 FL_WARN("dsps_fft2r_init_fc32 failed: " << static_cast<int>(err));
143 return false;
144 }
145 initialized = true;
146 return true;
147}
148
149inline bool espDspEnsureTwiddles(int N) FL_NOEXCEPT {
150 EspDspRealCtx &ctx = espDspRealCtx();
151 if (ctx.n == N) return true;
152 if (!espDspGlobalInit()) return false;
153 ctx.work.resize(N);
154 ctx.cos_table.resize(N / 2 - 1);
155 ctx.sin_table.resize(N / 2 - 1);
156 const float twoPi = 6.28318530717958647692f;
157 const float invN = 1.0f / static_cast<float>(N);
158 for (int k = 1; k < N / 2; ++k) {
159 float th = twoPi * static_cast<float>(k) * invN;
160 ctx.cos_table[k - 1] = ::cosf(th);
161 ctx.sin_table[k - 1] = ::sinf(th);
162 }
163 ctx.n = N;
164 return true;
165}
166
182inline void espDspRealForward(int N, const float *in,
184 if (!espDspEnsureTwiddles(N)) return;
185 EspDspRealCtx &ctx = espDspRealCtx();
186
187 // Pack N real samples into N/2 complex pairs (in-place buffer).
188 // data[2k] = x[2k] (re)
189 // data[2k+1] = x[2k+1] (im)
190 for (int k = 0; k < N / 2; ++k) {
191 ctx.work[2 * k] = in[2 * k];
192 ctx.work[2 * k + 1] = in[2 * k + 1];
193 }
194
195 // N/2-point complex FFT + bit-reverse.
196 dsps_fft2r_fc32(ctx.work.data(), N / 2);
197 dsps_bit_rev_fc32_ansi(ctx.work.data(), N / 2);
198
199 // Unpack.
200 const float Y0r = ctx.work[0];
201 const float Y0i = ctx.work[1];
202 out[0].r = Y0r + Y0i;
203 out[0].i = 0.0f;
204 out[N / 2].r = Y0r - Y0i;
205 out[N / 2].i = 0.0f;
206
207 for (int k = 1; k < N / 2; ++k) {
208 const int kp = N / 2 - k;
209 const float Ykr = ctx.work[2 * k];
210 const float Yki = ctx.work[2 * k + 1];
211 const float Ykpr = ctx.work[2 * kp];
212 const float Ykpi = ctx.work[2 * kp + 1];
213 const float a = Ykr - Ykpr;
214 const float b = Yki + Ykpi;
215 const float c = ctx.cos_table[k - 1];
216 const float s = ctx.sin_table[k - 1];
217 out[k].r = 0.5f * ((Ykr + Ykpr) + c * b - s * a);
218 out[k].i = 0.5f * ((Yki - Ykpi) - c * a - s * b);
219 }
220}
221
222} // namespace detail
223
224#endif // FL_FFT_ESP_DSP_AVAILABLE
225
237inline void fl_fft_real_forward(kiss_fftr_cfg cfg, int N,
238 const kiss_fft_scalar *in,
240 // NOTE: even when FL_FFT_ESP_DSP_ACTIVE is 1, the dispatcher currently
241 // stays on kiss_fftr. The ESP-DSP backend below is `float`-only but
242 // kiss_fft_scalar is typedef'd to `int16_t` when FastLED's default
243 // FASTLED_FFT_PRECISION=FASTLED_FFT_FIXED16 is in effect. Auto-routing
244 // through ESP-DSP would break the int16 call sites in fft_impl.cpp.hpp.
245 //
246 // Future work: either (a) add an int16 packed-FFT variant using
247 // dsps_fft2r_sc16 to match FIXED16 mode, or (b) auto-switch the library
248 // to FASTLED_FFT_FLOAT when FL_FFT_ESP_DSP_AVAILABLE is on. Once wired,
249 // replace the below with a conditional call to detail::espDspRealForward.
250 //
251 // The ESP-DSP code is compiled in whenever the ESP32 toolchain ships
252 // esp_dsp.h (auto-detected via FL_HAS_INCLUDE) and exposed as
253 // fl::audio::fft::detail::espDspRealForward() so the AudioFftParity
254 // example can exercise it directly for validation.
255 (void)N;
256 kiss_fftr(cfg, in, out);
257}
258
259} // namespace fft
260} // namespace audio
261} // namespace fl
fl::UIAudio audio("Audio Input")
AudioAnalyzeFFT1024 fft
#define kiss_fft_scalar
Definition kiss_fft.h:69
kiss_fft_scalar r
Definition kiss_fft.h:85
kiss_fft_scalar i
Definition kiss_fft.h:86
void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata, kiss_fft_cpx *freqdata) FL_NOEXCEPT
struct kiss_fftr_state * kiss_fftr_cfg
Definition kiss_fftr.h:26
#define FL_WARN(X)
Definition log.h:276
Centralized logging categories for FastLED hardware interfaces and subsystems.
void fl_fft_real_forward(kiss_fftr_cfg cfg, int N, const kiss_fft_scalar *in, kiss_fft_cpx *out) FL_NOEXCEPT
Forward real-to-complex FFT.
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_NOEXCEPT