FastLED 3.9.15
Loading...
Searching...
No Matches
chasing_spirals.cpp.hpp
Go to the documentation of this file.
1// Chasing Spirals — three implementations (Float, Q31 scalar, Q31 SIMD).
2//
3// All variants share setupChasingSpiralFrame() which builds a per-pixel SoA
4// geometry cache (base_angle, dist_scaled, radial filters, pixel_idx) and
5// a Perlin fade LUT. Per-frame constants (center, linear/radial offsets)
6// are computed once and passed via FrameSetup.
7//
8// Q31 scalar: Batches 3 channel sincos into one sincos32_simd call per pixel,
9// then evaluates Perlin noise and radial filter per channel.
10// Q31 SIMD: Processes 4 pixels at a time with full SIMD pipeline (aligned
11// SoA loads → sincos32_simd → Perlin → clamp/scale → scatter).
12// Perlin exits to scalar per-lane (SSE2 has no integer gather).
13
14#include "fl/stl/align.h"
21#include "fl/math/simd.h"
22#include "fl/math/sin32.h"
24
27
28namespace fl {
29
30namespace {
31
32using FP = fl::s16x16;
34
35// Common setup values returned by setupChasingSpiralFrame.
36// Carries raw SoA pointers (no PixelLUT AoS struct).
37struct FrameSetup {
39 const fl::i32 *base_angle;
40 const fl::i32 *dist_scaled;
41 const fl::i32 *rf3;
42 const fl::i32 *rf_half;
43 const fl::i32 *rf_quarter;
44 const fl::u16 *pixel_idx;
45 const fl::i32 *fade_lut;
46 const fl::u8 *perm;
47 fl::i32 cx_raw;
48 fl::i32 cy_raw;
49 fl::i32 lin0_raw;
50 fl::i32 lin1_raw;
51 fl::i32 lin2_raw;
52 fl::i32 rad0_raw;
53 fl::i32 rad1_raw;
54 fl::i32 rad2_raw;
56};
57
58// Convert s16x16 angle (radians) to A24 format for sincos32
59FASTLED_FORCE_INLINE u32 radiansToA24(i32 base_s16x16, i32 offset_s16x16) {
60 constexpr i32 RAD_TO_A24 = 2670177;
61 return static_cast<u32>((static_cast<i64>(base_s16x16 + offset_s16x16) * RAD_TO_A24) >> FP::FRAC_BITS);
62}
63
64// Compute Perlin coordinate from sincos result and distance
65FASTLED_FORCE_INLINE i32 perlinCoord(i32 sc_val, i32 dist_raw, i32 offset) {
66 return offset - static_cast<i32>((static_cast<i64>(sc_val) * dist_raw) >> 31);
67}
68
69// Clamp s16x16 value to [0, 1] and scale to [0, 255]
71 constexpr i32 FP_ONE = static_cast<i32>(1) << FP::FRAC_BITS;
72 if (raw_s16x16 < 0) raw_s16x16 = 0;
73 if (raw_s16x16 > FP_ONE) raw_s16x16 = FP_ONE;
74 return (raw_s16x16 << 8) - raw_s16x16;
75}
76
77// Apply radial filter to noise value and clamp to [0, 255]
78FASTLED_FORCE_INLINE i32 applyRadialFilter(i32 noise_255, i32 rf_raw) {
79 i32 result = static_cast<i32>((static_cast<i64>(noise_255) * rf_raw) >> (FP::FRAC_BITS * 2));
80 if (result < 0) result = 0;
81 if (result > 255) result = 255;
82 return result;
83}
84
85// Load 4 aligned i32 values from an SoA array into a SIMD register.
86//
87// FrameSetup pointers already carry assume_aligned<16> from setupChasingSpiralFrame(),
88// but reinterpret_cast<const u32*> strips the compiler's alignment metadata.
89// Re-asserting assume_aligned<16> here restores the hint so the compiler can
90// emit aligned SIMD loads (e.g. movaps/movdqa on x86) instead of unaligned ones
91// (movups/movdqu), which avoids a micro-op penalty on older cores and removes
92// a redundant alignment check on modern ones.
94 return simd::load_u32_4_aligned(
95 fl::assume_aligned<16>(reinterpret_cast<const u32*>(arr + i))); // ok reinterpret cast
96}
97
98// Write one pixel from per-channel SIMD registers at the given lane.
101 leds[idx] = CRGB(static_cast<u8>(simd::extract_u32_4(r, lane)),
102 static_cast<u8>(simd::extract_u32_4(g, lane)),
103 static_cast<u8>(simd::extract_u32_4(b, lane)));
104}
105
106// Process one color channel for 4 pixels using a full SIMD pipeline.
107// Returns 4 clamped [0, 255] channel values.
109 simd::simd_u32x4 base_vec, simd::simd_u32x4 dist_vec,
110 i32 radial_offset, i32 linear_offset,
111 const i32 *fade_lut, const u8 *perm, i32 cx_raw, i32 cy_raw,
112 simd::simd_u32x4 rf_vec) {
113
114 constexpr i32 RAD_TO_A24 = 2670177;
115
116 // Angle conversion: Q16.16 → A24
117 auto offset_vec = simd::set1_u32_4(static_cast<u32>(radial_offset));
118 auto sum_vec = simd::add_i32_4(base_vec, offset_vec);
119 auto rad_const_vec = simd::set1_u32_4(static_cast<u32>(RAD_TO_A24));
120 auto angles_vec = simd::mulhi_su32_4(sum_vec, rad_const_vec);
121
122 SinCos32_simd sc = sincos32_simd(angles_vec);
123
124 // Perlin coordinates: nx = lin+cx - cos*dist, ny = cy - sin*dist
125 auto lin_cx = simd::set1_u32_4(static_cast<u32>(linear_offset + cx_raw));
126 auto cy_vec = simd::set1_u32_4(static_cast<u32>(cy_raw));
127 auto nx_vec = simd::sub_i32_4(lin_cx,
128 simd::sll_u32_4(simd::mulhi32_i32_4(sc.cos_vals, dist_vec), 1));
129 auto ny_vec = simd::sub_i32_4(cy_vec,
130 simd::sll_u32_4(simd::mulhi32_i32_4(sc.sin_vals, dist_vec), 1));
131
132 // Perlin noise (SIMD floor/frac/wrap, scalar fade/perm/grad/lerp per lane)
134 nx_vec, ny_vec, fade_lut, perm);
135
136 // Clamp [0, FP_ONE], scale ×255, apply radial filter, clamp [0, 255]
137 auto zero = simd::set1_u32_4(0u);
138 auto fp_one = simd::set1_u32_4(static_cast<u32>(static_cast<i32>(1) << FP::FRAC_BITS));
139 auto clamped = simd::min_i32_4(simd::max_i32_4(raw_vec, zero), fp_one);
140 auto noise_scaled = simd::sub_i32_4(simd::sll_u32_4(clamped, 8), clamped);
141
142 auto max255 = simd::set1_u32_4(255u);
143 auto result = simd::mulhi32_i32_4(noise_scaled, rf_vec);
144 return simd::min_i32_4(simd::max_i32_4(result, zero), max255);
145}
146
147// Extract common frame setup logic shared by all variants.
148// Builds SoA geometry cache lazily (once when grid size changes).
149// state is the caller's per-instance ChasingSpiralState member (not a global).
151 auto *e = ctx.mEngine.get();
152 e->get_ready();
153
154 // Timing (once per frame, float is fine here)
155 e->timings.master_speed = 0.01;
156 e->timings.ratio[0] = 0.1;
157 e->timings.ratio[1] = 0.13;
158 e->timings.ratio[2] = 0.16;
159 e->timings.offset[1] = 10;
160 e->timings.offset[2] = 20;
161 e->timings.offset[3] = 30;
162 e->calculate_oscillators(e->timings);
163
164 const int num_x = e->num_x;
165 const int num_y = e->num_y;
166 const int total_pixels = num_x * num_y;
167
168 // Per-frame constants (float->FP boundary conversions)
169 constexpr FP scale(0.1f);
170 const FP radius_fp(e->radial_filter_radius);
171 const FP center_x_scaled = FP(e->animation.center_x * 0.1f);
172 const FP center_y_scaled = FP(e->animation.center_y * 0.1f);
173
174 const FP radial0(e->move.radial[0]);
175 const FP radial1(e->move.radial[1]);
176 const FP radial2(e->move.radial[2]);
177
178 // Reduce linear offsets modulo the Perlin noise period before converting
179 // to s16x16. Two reasons:
180 // 1. Prevents s16x16 overflow (range ±32767 in integer part).
181 // 2. Float32 precision fix: matches the same reduction applied in
182 // Chasing_Spirals_Float (animartrix v1 and v2 float paths) so both
183 // paths compute identical Perlin coordinates at all time values.
184 // Without this reduction, float32 loses per-pixel coordinate precision
185 // when move.linear grows large (ULP at 200,000 ≈ 0.024 > pixel step 0.1).
186 // Perlin noise is exactly periodic with period 256 at integer coordinates,
187 // so with scale_x=0.1 the effective period for offset_x is 256/0.1 = 2560.
188 // See: tests/fl/fx/2d/animartrix2.cpp "period reduction" test cases.
189 constexpr float perlin_period = 2560.0f; // 256.0f / scale_x(0.1f)
190 constexpr float scale_f = 0.1f;
191 const FP linear0_scaled = FP(fmodf(e->move.linear[0], perlin_period) * scale_f);
192 const FP linear1_scaled = FP(fmodf(e->move.linear[1], perlin_period) * scale_f);
193 const FP linear2_scaled = FP(fmodf(e->move.linear[2], perlin_period) * scale_f);
194
195 constexpr FP three_fp(3.0f);
196 constexpr FP one(1.0f);
197
198 // Build per-pixel SoA geometry (once when grid size changes)
199 if (state.count != total_pixels) {
200 const int padded = (total_pixels + 3) & ~3; // multiple of 4 for SIMD safety
201 state.base_angle.resize(padded, 0);
202 state.dist_scaled.resize(padded, 0);
203 state.rf3.resize(padded, 0);
204 state.rf_half.resize(padded, 0);
205 state.rf_quarter.resize(padded, 0);
206 state.pixel_idx.resize(padded, 0);
207
208 const FP inv_radius = one / radius_fp;
209 const FP one_third = one / three_fp;
210 int idx = 0;
211 for (int x = 0; x < num_x; x++) {
212 for (int y = 0; y < num_y; y++) {
213 const FP theta(e->polar_theta[x][y]);
214 const FP dist(e->distance[x][y]);
215 const FP rf = (radius_fp - dist) * inv_radius;
216 state.base_angle[idx] = (three_fp * theta - dist * one_third).raw();
217 state.dist_scaled[idx] = (dist * scale).raw();
218 state.rf3[idx] = (three_fp * rf).raw();
219 state.rf_half[idx] = (rf >> 1).raw();
220 state.rf_quarter[idx] = (rf >> 2).raw();
221 state.pixel_idx[idx] = e->mCtx->xyMapFn(x, y, e->mCtx->xyMapUserData);
222 idx++;
223 }
224 }
225 state.count = total_pixels;
226 }
227
228 // Initialize Perlin fade LUT once per state lifetime
229 if (!state.fade_lut_initialized) {
231 state.fade_lut_initialized = true;
232 }
233
234 const i32 cx_raw = center_x_scaled.raw();
235 const i32 cy_raw = center_y_scaled.raw();
236 const i32 lin0_raw = linear0_scaled.raw();
237 const i32 lin1_raw = linear1_scaled.raw();
238 const i32 lin2_raw = linear2_scaled.raw();
239 const i32 rad0_raw = radial0.raw();
240 const i32 rad1_raw = radial1.raw();
241 const i32 rad2_raw = radial2.raw();
242
243 // Stamp alignment on SoA pointers at the source so every downstream
244 // consumer (Q31 scalar loop, SIMD 4-wide loop, loadAligned helper)
245 // inherits the hint without needing per-site annotations.
246 //
247 // Why this matters for performance:
248 // 1. The SIMD path calls loadAligned() which feeds load_u32_4_aligned().
249 // With the alignment hint the compiler emits movdqa/movaps (aligned
250 // 128-bit loads) instead of movdqu/movups (unaligned). On older x86
251 // (pre-Nehalem) unaligned loads are significantly slower; on modern
252 // cores they still cost an extra micro-op when the address crosses a
253 // cache-line boundary.
254 // 2. The Q31 scalar path benefits too: the compiler can widen scalar
255 // i32 loads into SIMD gathers or auto-vectorize more aggressively
256 // when it knows the base pointer is 16-byte aligned.
257 // 3. fade_lut (256-entry i32 Perlin fade table) is accessed in every
258 // Perlin noise evaluation; the alignment hint lets the compiler
259 // assume cache-line-friendly access patterns.
260 //
261 // The underlying SoA arrays are allocated with FL_ALIGNAS(16) in
262 // ChasingSpiralState, so this is a promise (not a request).
263 // pixel_idx is u16 (2 bytes) and not SIMD-loaded, so no hint needed.
264 return FrameSetup{
265 total_pixels,
266 fl::assume_aligned<16>(state.base_angle.data()),
267 fl::assume_aligned<16>(state.dist_scaled.data()),
268 fl::assume_aligned<16>(state.rf3.data()),
269 fl::assume_aligned<16>(state.rf_half.data()),
270 fl::assume_aligned<16>(state.rf_quarter.data()),
271 state.pixel_idx.data(),
273 PERLIN_NOISE,
274 cx_raw,
275 cy_raw,
276 lin0_raw,
277 lin1_raw,
278 lin2_raw,
279 rad0_raw,
280 rad1_raw,
281 rad2_raw,
282 e->mCtx->leds
283 };
284}
285
286} // anonymous namespace
287
288// ============================================================================
289// Float Implementation (original algorithm, uses v2 Engine)
290// ============================================================================
291
293 auto *e = ctx.mEngine.get();
294 e->get_ready();
295
296 // Perlin noise is periodic with period 256 at integer coordinates.
297 // scale_x = 0.1, so the effective period for offset_x is 256/0.1 = 2560.
298 // Reducing move.linear[i] modulo this period before use keeps float32
299 // coordinate arithmetic precise even at extreme uptime values.
300 // Without this, float32 loses per-pixel precision when adding a small
301 // per-pixel term (~0.1) to a large offset (e.g. 200,000), since float32
302 // ULP at that magnitude (~0.024) is coarser than the per-pixel step.
303 // This matches the reduction already applied in setupChasingSpiralFrame
304 // for the Q31 path, keeping both paths in agreement at all time values.
305 static constexpr float perlin_period = 2560.0f; // 256.0f / scale_x(0.1f)
306
307 e->timings.master_speed = 0.01;
308 e->timings.ratio[0] = 0.1;
309 e->timings.ratio[1] = 0.13;
310 e->timings.ratio[2] = 0.16;
311 e->timings.offset[1] = 10;
312 e->timings.offset[2] = 20;
313 e->timings.offset[3] = 30;
314 e->calculate_oscillators(e->timings);
315
316 for (int x = 0; x < e->num_x; x++) {
317 for (int y = 0; y < e->num_y; y++) {
318 e->animation.angle =
319 3 * e->polar_theta[x][y] + e->move.radial[0] -
320 e->distance[x][y] / 3;
321 e->animation.dist = e->distance[x][y];
322 e->animation.scale_z = 0.1;
323 e->animation.scale_y = 0.1;
324 e->animation.scale_x = 0.1;
325 e->animation.offset_x = fl::fmodf(e->move.linear[0], perlin_period);
326 e->animation.offset_y = 0;
327 e->animation.offset_z = 0;
328 e->animation.z = 0;
329 float show1 = e->render_value(e->animation);
330
331 e->animation.angle =
332 3 * e->polar_theta[x][y] + e->move.radial[1] -
333 e->distance[x][y] / 3;
334 e->animation.dist = e->distance[x][y];
335 e->animation.offset_x = fl::fmodf(e->move.linear[1], perlin_period);
336 float show2 = e->render_value(e->animation);
337
338 e->animation.angle =
339 3 * e->polar_theta[x][y] + e->move.radial[2] -
340 e->distance[x][y] / 3;
341 e->animation.dist = e->distance[x][y];
342 e->animation.offset_x = fl::fmodf(e->move.linear[2], perlin_period);
343 float show3 = e->render_value(e->animation);
344
345 float radius = e->radial_filter_radius;
346 float radial_filter = (radius - e->distance[x][y]) / radius;
347
348 e->pixel.red = 3 * show1 * radial_filter;
349 e->pixel.green = show2 * radial_filter / 2;
350 e->pixel.blue = show3 * radial_filter / 4;
351
352 e->pixel = e->rgb_sanity_check(e->pixel);
353 e->setPixelColorInternal(x, y, e->pixel);
354 }
355 }
356}
357
358// ============================================================================
359// Q31 Scalar Implementation (fixed-point, non-vectorized)
360// ============================================================================
361
363 auto setup = setupChasingSpiralFrame(ctx, mState);
364 const int total_pixels = setup.total_pixels;
365 const i32 *fade_lut = setup.fade_lut;
366 const u8 *perm = setup.perm;
367 const i32 cx_raw = setup.cx_raw;
368 const i32 cy_raw = setup.cy_raw;
369 const i32 lin0_raw = setup.lin0_raw;
370 const i32 lin1_raw = setup.lin1_raw;
371 const i32 lin2_raw = setup.lin2_raw;
372 const i32 rad0_raw = setup.rad0_raw;
373 const i32 rad1_raw = setup.rad1_raw;
374 const i32 rad2_raw = setup.rad2_raw;
376
377 // Compute one noise channel from a batched SinCos32_simd result.
378 auto noise_channel = [&](const SinCos32_simd &sc, int lane,
379 i32 lin_raw, i32 dist_raw) -> i32 {
380 i32 cos_v = static_cast<i32>(simd::extract_u32_4(sc.cos_vals, lane));
381 i32 sin_v = static_cast<i32>(simd::extract_u32_4(sc.sin_vals, lane));
382 i32 nx = perlinCoord(cos_v, dist_raw, lin_raw + cx_raw);
383 i32 ny = perlinCoord(sin_v, dist_raw, cy_raw);
384 return clampAndScale255(Perlin::pnoise2d_raw(nx, ny, fade_lut, perm));
385 };
386
387 for (int i = 0; i < total_pixels; i++) {
388 const i32 base_raw = setup.base_angle[i];
389 const i32 dist_raw = setup.dist_scaled[i];
390
391 // Batch all 3 channel sincos into one SIMD call (4th lane unused)
392 simd::simd_u32x4 angles = simd::set_u32_4(
393 radiansToA24(base_raw, rad0_raw),
394 radiansToA24(base_raw, rad1_raw),
395 radiansToA24(base_raw, rad2_raw), 0);
396 SinCos32_simd sc = sincos32_simd(angles);
397
398 i32 s0 = noise_channel(sc, 0, lin0_raw, dist_raw);
399 i32 s1 = noise_channel(sc, 1, lin1_raw, dist_raw);
400 i32 s2 = noise_channel(sc, 2, lin2_raw, dist_raw);
401
402 i32 r = applyRadialFilter(s0, setup.rf3[i]);
403 i32 g = applyRadialFilter(s1, setup.rf_half[i]);
404 i32 b = applyRadialFilter(s2, setup.rf_quarter[i]);
405
406 leds[setup.pixel_idx[i]] = CRGB(static_cast<u8>(r),
407 static_cast<u8>(g),
408 static_cast<u8>(b));
409 }
410}
411
412// ============================================================================
413// SIMD Implementation (vectorized 4-wide processing)
414// ============================================================================
415
417 auto setup = setupChasingSpiralFrame(ctx, mState);
418 const int total_pixels = setup.total_pixels;
419 const i32 *base_angle = setup.base_angle;
420 const i32 *dist_scaled = setup.dist_scaled;
421 const i32 *rf3_arr = setup.rf3;
422 const i32 *rf_half_arr = setup.rf_half;
423 const i32 *rf_qtr_arr = setup.rf_quarter;
424 const u16 *pixel_idx = setup.pixel_idx;
425 const i32 *fade_lut = setup.fade_lut;
426 const u8 *perm = setup.perm;
427 const i32 cx_raw = setup.cx_raw;
428 const i32 cy_raw = setup.cy_raw;
429 const i32 lin0_raw = setup.lin0_raw;
430 const i32 lin1_raw = setup.lin1_raw;
431 const i32 lin2_raw = setup.lin2_raw;
432 const i32 rad0_raw = setup.rad0_raw;
433 const i32 rad1_raw = setup.rad1_raw;
434 const i32 rad2_raw = setup.rad2_raw;
436
437 // SIMD pixel pipeline: process 4 pixels per iteration
438 int i = 0;
439 for (; i + 3 < total_pixels; i += 4) {
440 // Aligned SoA loads (arrays are FL_ALIGNAS(16), stride is 4)
441 auto base_vec = loadAligned(base_angle, i);
442 auto dist_vec = loadAligned(dist_scaled, i);
443 auto rf3_vec = loadAligned(rf3_arr, i);
444 auto rf_half_vec = loadAligned(rf_half_arr, i);
445 auto rf_qtr_vec = loadAligned(rf_qtr_arr, i);
446
447 auto r_vec = simd4_processChannel(
448 base_vec, dist_vec, rad0_raw, lin0_raw, fade_lut, perm, cx_raw, cy_raw, rf3_vec);
449 auto g_vec = simd4_processChannel(
450 base_vec, dist_vec, rad1_raw, lin1_raw, fade_lut, perm, cx_raw, cy_raw, rf_half_vec);
451 auto b_vec = simd4_processChannel(
452 base_vec, dist_vec, rad2_raw, lin2_raw, fade_lut, perm, cx_raw, cy_raw, rf_qtr_vec);
453
454 // Scatter to LED array (pixel_idx holds arbitrary xyMap-remapped indices)
455 scatterPixel(leds, pixel_idx[i+0], r_vec, g_vec, b_vec, 0);
456 scatterPixel(leds, pixel_idx[i+1], r_vec, g_vec, b_vec, 1);
457 scatterPixel(leds, pixel_idx[i+2], r_vec, g_vec, b_vec, 2);
458 scatterPixel(leds, pixel_idx[i+3], r_vec, g_vec, b_vec, 3);
459 }
460
461 // Scalar fallback for remaining pixels (when total_pixels % 4 != 0)
462 for (; i < total_pixels; i++) {
463 const i32 base_raw = base_angle[i];
464 const i32 dist_raw = dist_scaled[i];
465
466 auto noise_ch = [&](i32 rad_raw, i32 lin_raw) -> i32 {
467 u32 a24 = radiansToA24(base_raw, rad_raw);
468 SinCos32 sc = sincos32(a24);
469 i32 nx = perlinCoord(sc.cos_val, dist_raw, lin_raw + cx_raw);
470 i32 ny = perlinCoord(sc.sin_val, dist_raw, cy_raw);
471 i32 raw = Perlin::pnoise2d_raw(nx, ny, fade_lut, perm);
472 return clampAndScale255(raw);
473 };
474
475 i32 s0 = noise_ch(rad0_raw, lin0_raw);
476 i32 s1 = noise_ch(rad1_raw, lin1_raw);
477 i32 s2 = noise_ch(rad2_raw, lin2_raw);
478
479 i32 r = applyRadialFilter(s0, rf3_arr[i]);
480 i32 g = applyRadialFilter(s1, rf_half_arr[i]);
481 i32 b = applyRadialFilter(s2, rf_qtr_arr[i]);
482
483 leds[pixel_idx[i]] = CRGB(static_cast<u8>(r), static_cast<u8>(g), static_cast<u8>(b));
484 }
485}
486
487} // namespace fl
488
void setup()
fl::CRGB leds[NUM_LEDS]
TestState state
fl::UISlider scale("Scale", 4,.1, 4,.1)
Alignment macros and utilities for FastLED.
void draw(Context &ctx) override
void draw(Context &ctx) override
void draw(Context &ctx) override
ChasingSpiralState mState
static constexpr int FRAC_BITS
Definition s16x16.h:22
constexpr i32 raw() const FL_NOEXCEPT
Definition s16x16.h:60
fl::UISlider offset("Offset", 0.0f, 0.0f, 1.0f, 0.01f)
FASTLED_FORCE_INLINE u32 radiansToA24(i32 base_s16x16, i32 offset_s16x16)
FASTLED_FORCE_INLINE void scatterPixel(fl::span< CRGB > leds, u16 idx, simd::simd_u32x4 r, simd::simd_u32x4 g, simd::simd_u32x4 b, int lane)
FASTLED_FORCE_INLINE i32 applyRadialFilter(i32 noise_255, i32 rf_raw)
simd::simd_u32x4 simd4_processChannel(simd::simd_u32x4 base_vec, simd::simd_u32x4 dist_vec, i32 radial_offset, i32 linear_offset, const i32 *fade_lut, const u8 *perm, i32 cx_raw, i32 cy_raw, simd::simd_u32x4 rf_vec)
FASTLED_FORCE_INLINE i32 perlinCoord(i32 sc_val, i32 dist_raw, i32 offset)
FrameSetup setupChasingSpiralFrame(Context &ctx, ChasingSpiralState &state)
FASTLED_FORCE_INLINE i32 clampAndScale255(i32 raw_s16x16)
FASTLED_FORCE_INLINE simd::simd_u32x4 loadAligned(const i32 *arr, int i)
unsigned char u8
Definition s16x16x4.h:132
T * assume_aligned(T *ptr) FL_NOEXCEPT
Definition s16x16x4.h:126
platforms::simd_u32x4 simd_u32x4
Definition types.h:26
unsigned char u8
Definition stdint.h:131
fl::CRGB CRGB
Definition video.h:15
FASTLED_FORCE_INLINE SinCos32_simd sincos32_simd(simd::simd_u32x4 angles) FL_NOEXCEPT
Process 4 angles simultaneously, returning vectorized sin/cos values SIMD-optimized: vectorized angle...
Definition sin32.h:145
static constexpr i32 FP_ONE
fl::i64 i64
Definition s16x16x4.h:222
float fmodf(float x, float y) FL_NOEXCEPT
Definition math.h:336
expected< T, E > result
Alias for expected (Rust-style naming)
Definition result.h:31
FASTLED_FORCE_INLINE SinCos32 sincos32(u32 angle) FL_NOEXCEPT
Definition sin32.h:88
Base definition for an LED controller.
Definition crgb.hpp:179
i32 sin_val
Definition sin32.h:25
i32 cos_val
Definition sin32.h:26
#define FL_OPTIMIZATION_LEVEL_O3_BEGIN
#define FASTLED_FORCE_INLINE
#define FL_FAST_MATH_BEGIN
#define FL_FAST_MATH_END
#define FL_OPTIMIZATION_LEVEL_O3_END
Umbrella header for SIMD subsystem.
fl::unique_ptr< Engine > mEngine
Definition context.h:38
static fl::simd::simd_u32x4 pnoise2d_raw_simd4_vec(fl::simd::simd_u32x4 nx_vec, fl::simd::simd_u32x4 ny_vec, const fl::i32 *fade_lut, const fl::u8 *perm)
static void init_fade_lut(fl::i32 *table)