FastLED 3.9.15
Loading...
Searching...
No Matches
AutoResearchAnimartrixBench.h
Go to the documentation of this file.
1// AutoResearchAnimartrixBench.h
2//
3// Animartrix-representative Perlin-noise benchmark: scalar float (`fl::pnoise`)
4// vs s16x16 fixed-point (`fl::perlin_i16_optimized::pnoise2d`).
5//
6// Why Perlin noise: it's the workhorse of every Animartrix effect — every
7// frame, every pixel goes through one or more `pnoise` calls to drive the
8// polar-coordinate field that shapes the pattern. If a fixed-point variant
9// wins here, it wins in Animartrix.
10//
11// Why s16x16 (and not s8x8): Animartrix's fade/lerp/grad inner loops need
12// the precision of Q16.16 to match the float reference. The s8x8 Q8.8
13// type loses too many bits in the multiplications inside the gradient
14// step. This benchmark mirrors what the FastLED tree already ships
15// (`fl/fx/2d/animartrix_detail/perlin_i16_optimized.cpp.hpp`).
16
17#pragma once
18
19#include <FastLED.h>
23
24namespace autoresearch {
26
27// Volatile sink to defeat dead-code elimination on both the float and
28// the i16 outputs. Same trick the SIMD multiply benchmark uses.
29static volatile int32_t g_animartrix_bench_sink;
30
32 int64_t iterations;
33 int64_t pnoise_float_us; // fl::pnoise(x, y, 0) total wall time
34 int64_t pnoise_i16_us; // fl::perlin_i16_optimized::pnoise2d total wall time
35 // The same coordinate grid is fed to both versions so the comparison
36 // measures purely the arithmetic cost of float vs i16 fixed-point.
37};
38
39// Sweep a 2D coordinate grid the same way an Animartrix render pass
40// would (one Perlin lookup per output pixel). 16*16 = 256 pixels per
41// outer iter, matching a 16x16 panel — small enough that the compiler
42// won't unroll the world, large enough that any per-iteration fixed
43// overhead is amortized.
46 r.iterations = iters;
47
48 // Init the i16 implementation's fade lookup table once, off the
49 // benchmark clock. Function-local static so the linter's
50 // static-in-header rule stays happy and C++11's thread-safe init
51 // guarantees a single initialization across calls. The float path
52 // has no equivalent setup.
53 struct FadeLut {
54 int32_t table[257];
55 FadeLut() { fl::perlin_i16_optimized::init_fade_lut(table); }
56 };
57 static const FadeLut fade_lut_holder; // C++11 magic statics
58 const int32_t* fade_lut = fade_lut_holder.table;
59
60 constexpr int GRID = 16; // 16x16 pixel pass per iteration
61 constexpr float STEP_F = 0.05f; // Animartrix-typical pixel step in noise space
62 // Mirror in fixed-point: 0.05 in Q16.16 = 0.05 * 65536 ≈ 3277
63 constexpr int32_t STEP_I = static_cast<int32_t>(0.05f * 65536.0f);
64
65 // ── Float pnoise ──────────────────────────────────────────────
66 {
67 float ax = 0.0f, ay = 0.0f;
68 int32_t sink = 0;
69 uint32_t t0 = micros();
70 for (int it = 0; it < iters; it++) {
71 // Slowly drift the origin across the iter so the compiler
72 // can't pre-compute. Same pattern Animartrix uses (the
73 // origin advances with `time_speed * dt` each frame).
74 ax += 0.011f;
75 ay += 0.013f;
76 for (int row = 0; row < GRID; row++) {
77 float y = ay + row * STEP_F;
78 for (int col = 0; col < GRID; col++) {
79 float x = ax + col * STEP_F;
80 float n = fl::pnoise(x, y, 0.0f);
81 // Map [-1, 1] → int8 the way Animartrix's output
82 // stage does. Sink to defeat DCE.
83 sink += static_cast<int32_t>(n * 127.0f);
84 }
85 }
86 }
87 uint32_t t1 = micros();
89 r.pnoise_float_us = static_cast<int64_t>(t1 - t0);
90 }
91
92 // ── i16 fixed-point pnoise2d ──────────────────────────────────
93 {
94 int32_t ax_i = 0, ay_i = 0;
95 // Q16.16 increment for the slow drift (matches 0.011, 0.013 in float)
96 constexpr int32_t DRIFT_X = static_cast<int32_t>(0.011f * 65536.0f);
97 constexpr int32_t DRIFT_Y = static_cast<int32_t>(0.013f * 65536.0f);
98 int32_t sink = 0;
99 uint32_t t0 = micros();
100 for (int it = 0; it < iters; it++) {
101 ax_i += DRIFT_X;
102 ay_i += DRIFT_Y;
103 for (int row = 0; row < GRID; row++) {
104 int32_t y_i = ay_i + row * STEP_I;
105 for (int col = 0; col < GRID; col++) {
106 int32_t x_i = ax_i + col * STEP_I;
107 // pnoise2d_raw returns i32 Q16.16. Scale to int8
108 // the same way the float path does — divide by
109 // (HP_ONE / 127) so the magnitudes line up.
111 x_i, y_i, fade_lut, fl::PERLIN_NOISE);
112 sink += n >> 9; // crude scale; the actual op cost,
113 // not the value, is what we measure
114 }
115 }
116 }
117 uint32_t t1 = micros();
119 r.pnoise_i16_us = static_cast<int64_t>(t1 - t0);
120 }
121
122 return r;
123}
124
125} // namespace animartrix_check
126} // namespace autoresearch
int y
Definition simple.h:93
int x
Definition simple.h:92
PerlinBenchResult runPerlinBenchmark(int iters)
static volatile int32_t g_animartrix_bench_sink
FASTLED_FORCE_INLINE float pnoise(float x, float y, float z)
static void init_fade_lut(fl::i32 *table)
static fl::i32 pnoise2d_raw(fl::i32 fx_raw, fl::i32 fy_raw, const fl::i32 *fade_lut, const fl::u8 *perm)