FastLED 3.9.15
Loading...
Searching...
No Matches
wave_simulation_real.cpp.hpp
Go to the documentation of this file.
1// Based on works and code by Shawn Silverman.
2
3#include "fl/stl/stdint.h"
4
5#include "fl/math/math.h"
7
8namespace fl {
9
10// Define Q15 conversion constants.
11// #define FIXED_SCALE (1 << 15) // 32768: 1.0 in Q15
12#define INT16_POS (32767) // Maximum value for i16
13#define INT16_NEG (-32768) // Minimum value for i16
14
15namespace wave_detail { // Anonymous namespace for internal linkage
16// Convert float to fixed Q15.
17// i16 float_to_fixed(float f) { return (i16)(f * FIXED_SCALE); }
18
19i16 float_to_fixed(float f) {
20 f = fl::clamp(f, -1.0f, 1.0f);
21 if (f < 0.0f) {
22 return (i16)(f * INT16_NEG);
23 } else {
24 return (i16)(f * INT16_POS); // Round to nearest
25 }
26}
27
28// Convert fixed Q15 to float.
29float fixed_to_float(i16 f) {
30 // return ((float)f) / FIXED_SCALE;
31 if (f < 0) {
32 return ((float)f) / INT16_NEG; // Negative values
33 } else {
34 return ((float)f) / INT16_POS; // Positive values
35 }
36}
37
38// // Multiply two Q15 fixed point numbers.
39// i16 fixed_mul(i16 a, i16 b) {
40// return (i16)(((i32)a * b) >> 15);
41// }
42
43// Precompute the Q15 damping decay factor for a power-of-two exponent.
44// The damping update is `f_new = f * (1 - 1/2^damp)`, equivalent to the
45// arithmetic-shift form `f -= f >> damp` (modulo 1-LSB rounding). Caching
46// it as Q15 here lets the kernel use a single Q15 multiply per cell and
47// opens the door to non-power-of-two damping if the public API ever
48// needs it (today it's still int-only). damp <= 0 means no decay.
50 if (damp <= 0) return INT16_POS; // ~1.0 — no decay
51 const float decay = 1.0f - 1.0f / static_cast<float>(1 << damp);
52 return float_to_fixed(decay);
53}
54} // namespace wave_detail
55
58 : length(len),
59 grid1(length + 2), // Initialize vector with correct size
60 grid2(length + 2), // Initialize vector with correct size
61 whichGrid(0),
62 // CFL stability bound for the explicit leapfrog 5-point stencil in 1D
63 // is C^2 <= 1. Negative speed has no physical meaning. Clamp at the
64 // boundary to keep the Q15 fixed-point kernel both stable and within
65 // i32 overflow margins (paired with the i64 promote in update()).
66 mCourantSq(wave_detail::float_to_fixed(fl::clamp(courantSq, 0.0f, 1.0f))),
69 // Additional initialization can be added here if needed.
70}
71
72void WaveSimulation1D_Real::setSpeed(float something) {
73 // See constructor for clamp rationale.
74 mCourantSq = wave_detail::float_to_fixed(fl::clamp(something, 0.0f, 1.0f));
75}
76
81
83
87
88i16 WaveSimulation1D_Real::geti16(fl::size x) const {
89 if (x >= length) {
90 FL_WARN("Out of range.");
91 return 0;
92 }
93 const i16 *curr = (whichGrid == 0) ? grid1.data() : grid2.data();
94 return curr[x + 1];
95}
96
98 if (x >= length) {
99 FL_WARN("Out of range.");
100 return 0;
101 }
102 const i16 *prev = (whichGrid == 0) ? grid2.data() : grid1.data();
103 return prev[x + 1];
104}
105
106float WaveSimulation1D_Real::getf(fl::size x) const {
107 if (x >= length) {
108 FL_WARN("Out of range.");
109 return 0.0f;
110 }
111 // Retrieve value from the active grid (offset by 1 for boundary).
112 const i16 *curr = (whichGrid == 0) ? grid1.data() : grid2.data();
113 return wave_detail::fixed_to_float(curr[x + 1]);
114}
115
116bool WaveSimulation1D_Real::has(fl::size x) const { return (x < length); }
117
118void WaveSimulation1D_Real::set(fl::size x, float value) {
119 if (x >= length) {
120 FL_WARN("warning X value too high");
121 return;
122 }
123 i16 *curr = (whichGrid == 0) ? grid1.data() : grid2.data();
125}
126
128 i16 *curr = (whichGrid == 0) ? grid1.data() : grid2.data();
129 i16 *next = (whichGrid == 0) ? grid2.data() : grid1.data();
130
131 // Update boundaries with a Neumann (zero-gradient) condition:
132 curr[0] = curr[1];
133 curr[length + 1] = curr[length];
134
135 i32 mCourantSq32 = static_cast<i32>(mCourantSq);
136 // Hoist the lower-saturation bound: in half-duplex mode the new value
137 // must be >= 0 (negative parts of the wave are clipped); in full-duplex
138 // mode the full Q15 negative range is allowed. Folding this into the
139 // per-cell clamp via fl::clamp lets us drop the dedicated second pass
140 // that used to walk the whole grid after the update loop.
141 const i32 q15_min = mHalfDuplex ? 0 : -32768;
142 // Iterate over each inner cell.
143 for (fl::size i = 1; i < length + 1; i++) {
144 // Compute the 1D Laplacian:
145 // lap = curr[i+1] - 2 * curr[i] + curr[i-1]
146 i32 lap =
147 (i32)curr[i + 1] - ((i32)curr[i] << 1) + curr[i - 1];
148
149 // Multiply the Laplacian by the simulation speed using Q15 arithmetic.
150 // Promote to i64 before the multiply. With the 1D CFL clamp at 1.0,
151 // max |mCourantSq32| = 32767 and max |lap| ~131,070 (saturated
152 // alternating cells) give a worst-case product of ~4.3e9 — past
153 // i32 max (2.15e9). The i64 promote also acts as a safety net if
154 // the clamp is ever regressed; pre-clamp the same product could
155 // already reach ~4.3e9 in 1D and ~8.6e9 in 2D.
156 i32 term = static_cast<i32>(
157 (static_cast<i64>(mCourantSq32) * lap) >> 15);
158
159 // Compute the new value:
160 // f = -next[i] + 2 * curr[i] + term
161 i32 f = -(i32)next[i] + ((i32)curr[i] << 1) + term;
162
163 // Apply damping: use a precomputed Q15 multiplier in place of the
164 // arithmetic shift. Functionally equivalent for power-of-two damp
165 // (modulo 1-LSB rounding) but generalizes cleanly to non-power-of-
166 // two damping if the public API ever needs it. On Cortex-M4+ this
167 // lowers to a single smmlsr/smulwb; on AVR it's a 16x32 multiply
168 // (~30 cycles) — same ballpark as the shift but cleaner semantics.
169 f = static_cast<i32>(
170 (static_cast<i64>(f) * mDampDecayQ15) >> 15);
171
172 // Clamp f into [q15_min, 32767] in a single step. q15_min is 0 when
173 // half-duplex is on, -32768 otherwise. This subsumes both the Q15
174 // saturation clamp and the post-pass that used to zero negatives.
175 next[i] = static_cast<i16>(fl::clamp(f, q15_min, static_cast<i32>(32767)));
176 }
177
178 // Toggle the active grid.
179 whichGrid ^= 1;
180}
181
183 float speed, float dampening) FL_NOEXCEPT
184 : width(W), height(H), stride(W + 2),
185 grid1((W + 2) * (H + 2)),
186 grid2((W + 2) * (H + 2)), whichGrid(0),
187 // CFL stability bound for the explicit leapfrog 5-point stencil in 2D
188 // is C^2 <= 1/2. Negative speed has no physical meaning. Clamp at the
189 // boundary to keep the Q15 fixed-point kernel both stable and within
190 // i32 overflow margins (paired with the i64 promote in update()).
192 // Dampening exponent; e.g., 6 means a factor of 2^6 = 64.
193 mDampening(static_cast<int>(dampening)),
195
196void WaveSimulation2D_Real::setSpeed(float something) {
197 // See constructor for clamp rationale.
198 mCourantSq = wave_detail::float_to_fixed(fl::clamp(something, 0.0f, 0.5f));
199}
200
205
207
211
212float WaveSimulation2D_Real::getf(fl::size x, fl::size y) const {
213 if (x >= width || y >= height) {
214 FL_WARN("Out of range: " << x << ", " << y);
215 return 0.0f;
216 }
217 const i16 *curr = (whichGrid == 0 ? grid1.data() : grid2.data());
218 return wave_detail::fixed_to_float(curr[(y + 1) * stride + (x + 1)]);
219}
220
221i16 WaveSimulation2D_Real::geti16(fl::size x, fl::size y) const {
222 if (x >= width || y >= height) {
223 FL_WARN("Out of range: " << x << ", " << y);
224 return 0;
225 }
226 const i16 *curr = (whichGrid == 0 ? grid1.data() : grid2.data());
227 return curr[(y + 1) * stride + (x + 1)];
228}
229
230i16 WaveSimulation2D_Real::geti16Previous(fl::size x, fl::size y) const {
231 if (x >= width || y >= height) {
232 FL_WARN("Out of range: " << x << ", " << y);
233 return 0;
234 }
235 const i16 *prev = (whichGrid == 0 ? grid2.data() : grid1.data());
236 return prev[(y + 1) * stride + (x + 1)];
237}
238
239bool WaveSimulation2D_Real::has(fl::size x, fl::size y) const {
240 return (x < width && y < height);
241}
242
243void WaveSimulation2D_Real::setf(fl::size x, fl::size y, float value) {
245 return seti16(x, y, v);
246}
247
248void WaveSimulation2D_Real::seti16(fl::size x, fl::size y, i16 value) {
249 if (x >= width || y >= height) {
250 FL_WARN("Out of range: " << x << ", " << y);
251 return;
252 }
253 i16 *curr = (whichGrid == 0 ? grid1.data() : grid2.data());
254 curr[(y + 1) * stride + (x + 1)] = value;
255}
256
258 i16 *curr = (whichGrid == 0 ? grid1.data() : grid2.data());
259 i16 *next = (whichGrid == 0 ? grid2.data() : grid1.data());
260
261 // Update horizontal boundaries.
262 for (fl::size j = 0; j < height + 2; ++j) {
263 if (mXCylindrical) {
264 curr[j * stride + 0] = curr[j * stride + width];
265 curr[j * stride + (width + 1)] = curr[j * stride + 1];
266 } else {
267 curr[j * stride + 0] = curr[j * stride + 1];
268 curr[j * stride + (width + 1)] = curr[j * stride + width];
269 }
270 }
271
272 // Update vertical boundaries.
273 for (fl::size i = 0; i < width + 2; ++i) {
274 curr[0 * stride + i] = curr[1 * stride + i];
275 curr[(height + 1) * stride + i] = curr[height * stride + i];
276 }
277
278 i32 mCourantSq32 = static_cast<i32>(mCourantSq);
279 // Hoist the lower-saturation bound — see WaveSimulation1D_Real::update()
280 // for the rationale. Fold half-duplex into the per-cell clamp instead
281 // of running a second pass over the grid (especially expensive on
282 // PSRAM-backed grids).
283 const i32 q15_min = mHalfDuplex ? 0 : -32768;
284
285 // Update each inner cell. Per-row hoist: lift j*stride out of the inner
286 // loop, set up row pointers to curr/next/above/below, and mark them
287 // FL_RESTRICT_PARAM so the optimizer knows curr and next don't alias.
288 // This gives the compiler a clean dependency picture across iterations
289 // (it can vectorize / interleave loads without re-deriving the address).
290 //
291 // Outer-level branch on the stencil keeps the inner loop branchless;
292 // the two kernels are otherwise identical (Q15 multiply, damping,
293 // clamp). FivePoint is the backward-compatible default; the wrapper
294 // class WaveSimulation2D auto-selects NinePointIsotropic at high
295 // super-sample factors where the anisotropy of the 5-point stencil
296 // becomes visually obvious.
297 const bool nine_point = (mStencil == LaplacianStencil::NinePointIsotropic);
298 for (fl::size j = 1; j <= height; ++j) {
299 const fl::size row = j * stride;
300 const i16* FL_RESTRICT_PARAM row_curr = curr + row;
301 const i16* FL_RESTRICT_PARAM row_above = curr + (row - stride);
302 const i16* FL_RESTRICT_PARAM row_below = curr + (row + stride);
303 i16* FL_RESTRICT_PARAM row_next = next + row;
304 for (fl::size i = 1; i <= width; ++i) {
305 const i32 c = row_curr[i];
306 i32 laplacian;
307 if (nine_point) {
308 // 9-point isotropic Laplacian, scaled up by 6 to keep
309 // everything in integer arithmetic:
310 // 6 * lap = (NW+NE+SW+SE) + 4*(N+S+E+W) - 20*C
311 // The /6 is folded into the term computation below so we
312 // pay it once per cell rather than four times.
313 const i32 diag = (i32)row_above[i - 1] + row_above[i + 1] +
314 row_below[i - 1] + row_below[i + 1];
315 const i32 nbr = (i32)row_above[i] + row_below[i] +
316 row_curr[i - 1] + row_curr[i + 1];
317 laplacian = diag + (nbr << 2) - 20 * c;
318 } else {
319 // Standard 5-point Laplacian: N + S + E + W - 4*C.
320 laplacian = (i32)row_curr[i + 1] + row_curr[i - 1] +
321 row_above[i] + row_below[i] - (c << 2);
322 }
323 // Promote to i64 before the multiply. With the 2D CFL clamp at
324 // 0.5, the 5-point worst case product is ~4.3e9 and the
325 // 9-point (scaled by 6, so |lap| ~6x larger) is ~2.6e10 — both
326 // past i32 max (2.15e9). The i64 promote also acts as a safety
327 // net if the clamp is ever regressed.
328 i64 product = static_cast<i64>(mCourantSq32) * laplacian;
329 i32 term;
330 if (nine_point) {
331 // Undo the x6 scaling on the 9-point Laplacian here.
332 // Integer division by a compile-time-constant 6 is lowered
333 // to a reciprocal-multiply on Cortex-M4+ and to a small
334 // shift+add on AVR; either way it's well under the cost
335 // of computing the Laplacian itself.
336 term = static_cast<i32>((product >> 15) / 6);
337 } else {
338 term = static_cast<i32>(product >> 15);
339 }
340 // f = -next[index] + 2 * curr[index] + mCourantSq * laplacian.
341 i32 f = -(i32)row_next[i] + (c << 1) + term;
342
343 // Apply damping with the precomputed Q15 decay multiplier —
344 // see the 1D update for the rationale.
345 f = static_cast<i32>(
346 (static_cast<i64>(f) * mDampDecayQ15) >> 15);
347
348 // Clamp f into [q15_min, 32767] in a single step — subsumes
349 // both the Q15 saturation clamp and the half-duplex zero pass.
350 row_next[i] = static_cast<i16>(fl::clamp(f, q15_min, static_cast<i32>(32767)));
351 }
352 }
353
354 // Swap the roles of the grids.
355 whichGrid ^= 1;
356}
357
358} // namespace fl
uint16_t speed
Definition Noise.ino:66
static const int H
Definition PerfDisc.ino:20
static const int W
Definition PerfDisc.ino:19
WaveSimulation1D_Real(u32 length, float speed=0.16f, int dampening=6) FL_NOEXCEPT
void set(fl::size x, float value)
void setDampening(int damp) FL_NOEXCEPT
void setf(fl::size x, fl::size y, float value)
i16 geti16Previous(fl::size x, fl::size y) const
i16 geti16(fl::size x, fl::size y) const
fl::vector_psram< i16 > grid1
WaveSimulation2D_Real(u32 W, u32 H, float speed=0.16f, float dampening=6.0f) FL_NOEXCEPT
float getf(fl::size x, fl::size y) const
void seti16(fl::size x, fl::size y, i16 value)
bool has(fl::size x, fl::size y) const
fl::vector_psram< i16 > grid2
void setDampening(int damp) FL_NOEXCEPT
fl::UISlider length("Length", 1.0f, 0.0f, 1.0f, 0.01f)
fl::UISlider dampening("Dampening", 6.0f, 0.0f, 10.0f, 0.1f)
#define FL_WARN(X)
Definition log.h:276
i16 compute_damp_decay_q15(int damp) FL_NOEXCEPT
constexpr int type_rank< T >::value
u8 u8 height
Definition blur.h:186
fl::i64 i64
Definition s16x16x4.h:222
u8 width
Definition blur.h:186
constexpr enable_if< is_fixed_point< T >::value, T >::type clamp(T x, T lo, T hi) FL_NOEXCEPT
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_RESTRICT_PARAM
#define FL_NOEXCEPT
#define INT16_NEG
#define INT16_POS