FastLED 3.9.15
Loading...
Searching...
No Matches
AutoResearchWave8Expand.h
Go to the documentation of this file.
1
11
12#pragma once
13
15#include "fl/channels/wave8.h"
17#include "fl/stl/bit_cast.h"
18#include "fl/stl/int.h"
19
20#if defined(ARDUINO_ARCH_ESP32) || defined(ESP_PLATFORM)
21#include <Arduino.h> // micros()
22#endif
23
24namespace autoresearch {
25namespace wave8_bench {
26
28 fl::u32 iters;
29 // Expansion in isolation (the #2526 strategies side-by-side).
30 fl::u32 expand_nibble_us; // current production
31 fl::u32 expand_byte_us; // S1: byte-indexed 256x8 LUT
32 fl::u32 expand_batched_us; // S3: byte LUT, load-all-then-store-all
33 // Full per-byte-position cost (expansion + 16-lane transpose). This is
34 // what the parlio engine actually pays per byte-position * 768/frame.
37 fl::u32 sink;
38};
39
40#if defined(ARDUINO_ARCH_ESP32) || defined(ESP_PLATFORM)
41
44inline Wave8ExpandResult measureWave8Expand(int iters_in = 30000) {
45 Wave8ExpandResult result{};
46 if (iters_in < 1) {
47 iters_in = 1;
48 }
49 if (iters_in > 200000) {
50 iters_in = 200000;
51 }
52 result.iters = static_cast<fl::u32>(iters_in);
53
54 // Representative WS2812B-ish timing; absolutes don't matter for the bench.
55 fl::ChipsetTiming timing;
56 timing.T1 = 400;
57 timing.T2 = 450;
58 timing.T3 = 400;
59
60 fl::Wave8BitExpansionLut nibLut = fl::buildWave8ExpansionLUT(timing);
61 fl::Wave8ByteExpansionLut byteLut = fl::buildWave8ByteExpansionLUT(nibLut);
62
63 fl::u8 lanes[16];
64 for (int i = 0; i < 16; ++i) {
65 lanes[i] = static_cast<fl::u8>(i * 17 + 3);
66 }
67 fl::Wave8Byte out[16];
68 volatile fl::u32 sink = 0;
69
70 // Warm caches / icache.
71 for (int i = 0; i < 16; ++i) {
72 fl::detail::wave8_convert_byte_to_wave8byte(lanes[i], nibLut, &out[i]);
73 fl::detail::wave8_expand_byte(lanes[i], byteLut, &out[i]);
74 }
75 fl::u8 transposed[16 * sizeof(fl::Wave8Byte)];
76 fl::wave8Transpose_16(reinterpret_cast<const fl::u8(&)[16]>(lanes), nibLut,
77 reinterpret_cast<fl::u8(&)[16 * sizeof(fl::Wave8Byte)]>(transposed));
78 fl::wave8Transpose_16(reinterpret_cast<const fl::u8(&)[16]>(lanes), byteLut,
79 reinterpret_cast<fl::u8(&)[16 * sizeof(fl::Wave8Byte)]>(transposed));
80
81 const int iters = iters_in;
82
83 // --- Expansion only: nibble (current production path) ---
84 {
85 fl::u32 t0 = micros();
86 for (int it = 0; it < iters; ++it) {
87 lanes[0] = static_cast<fl::u8>(it);
88 lanes[8] = static_cast<fl::u8>(~it);
89 for (int i = 0; i < 16; ++i) {
90 fl::detail::wave8_convert_byte_to_wave8byte(lanes[i], nibLut, &out[i]);
91 }
92 sink ^= out[0].symbols[0].data ^ out[15].symbols[7].data;
93 }
94 result.expand_nibble_us = micros() - t0;
95 }
96
97 // --- Expansion only: byte-LUT (S1) ---
98 {
99 fl::u32 t0 = micros();
100 for (int it = 0; it < iters; ++it) {
101 lanes[0] = static_cast<fl::u8>(it);
102 lanes[8] = static_cast<fl::u8>(~it);
103 for (int i = 0; i < 16; ++i) {
104 fl::detail::wave8_expand_byte(lanes[i], byteLut, &out[i]);
105 }
106 sink ^= out[0].symbols[0].data ^ out[15].symbols[7].data;
107 }
108 result.expand_byte_us = micros() - t0;
109 }
110
111 // --- Expansion only: batched byte-LUT (S3: load-all-then-store-all) ---
112 {
113 fl::u32 t0 = micros();
114 for (int it = 0; it < iters; ++it) {
115 lanes[0] = static_cast<fl::u8>(it);
116 lanes[8] = static_cast<fl::u8>(~it);
117 fl::u32 lo[16];
118 fl::u32 hi[16];
119 for (int i = 0; i < 16; ++i) {
120 const fl::u32 *src = fl::bit_cast_ptr<const fl::u32>(&byteLut.lut[lanes[i]]);
121 lo[i] = src[0];
122 hi[i] = src[1];
123 }
124 for (int i = 0; i < 16; ++i) {
125 fl::u32 *dst = fl::bit_cast_ptr<fl::u32>(&out[i]);
126 dst[0] = lo[i];
127 dst[1] = hi[i];
128 }
129 sink ^= out[0].symbols[0].data ^ out[15].symbols[7].data;
130 }
131 result.expand_batched_us = micros() - t0;
132 }
133
134 // --- Full per-byte-position (expansion + 16-lane transpose), nibble path ---
135 {
136 fl::u32 t0 = micros();
137 for (int it = 0; it < iters; ++it) {
138 lanes[0] = static_cast<fl::u8>(it);
139 lanes[8] = static_cast<fl::u8>(~it);
140 fl::wave8Transpose_16(reinterpret_cast<const fl::u8(&)[16]>(lanes), nibLut,
141 reinterpret_cast<fl::u8(&)[16 * sizeof(fl::Wave8Byte)]>(transposed));
142 sink ^= transposed[0] ^ transposed[127];
143 }
144 result.transpose16_nibble_us = micros() - t0;
145 }
146
147 // --- Full per-byte-position (expansion + 16-lane transpose), byte-LUT path ---
148 {
149 fl::u32 t0 = micros();
150 for (int it = 0; it < iters; ++it) {
151 lanes[0] = static_cast<fl::u8>(it);
152 lanes[8] = static_cast<fl::u8>(~it);
153 fl::wave8Transpose_16(reinterpret_cast<const fl::u8(&)[16]>(lanes), byteLut,
154 reinterpret_cast<fl::u8(&)[16 * sizeof(fl::Wave8Byte)]>(transposed));
155 sink ^= transposed[0] ^ transposed[127];
156 }
157 result.transpose16_byte_us = micros() - t0;
158 }
159
160 result.sink = static_cast<fl::u32>(sink);
161 return result;
162}
163
164#else // non-ESP32
165
166inline Wave8ExpandResult measureWave8Expand(int /*iters*/ = 30000) { return {}; }
167
168#endif
169
170} // namespace wave8_bench
171} // namespace autoresearch
Centralized LED chipset timing definitions with nanosecond precision.
Wave8ExpandResult measureWave8Expand(int=30000)
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_convert_byte_to_wave8byte(u8 byte_value, const Wave8BitExpansionLut &lut, Wave8Byte *output)
Helper: Convert byte to Wave8Byte using nibble LUT (internal use only)
Definition wave8.hpp:47
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_expand_byte(u8 byte_value, const Wave8ByteExpansionLut &lut, Wave8Byte *output)
Byte-indexed expansion (#2526): one indexed 8-byte copy.
Definition wave8.hpp:69
unsigned char u8
Definition stdint.h:131
FL_OPTIMIZE_FUNCTION Wave8BitExpansionLut buildWave8ExpansionLUT(const ChipsetTiming &timing)
Build a Wave8BitExpansionLut from chipset timing data.
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16(const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
Definition wave8.cpp.hpp:73
Wave8ByteExpansionLut buildWave8ByteExpansionLUT(const Wave8BitExpansionLut &nibble)
Build a byte-indexed expansion LUT (#2526) from the nibble LUT.
expected< T, E > result
Alias for expected (Rust-style naming)
Definition result.h:31
fl::u32 micros()
Universal microsecond timer - returns microseconds since system startup.
To * bit_cast_ptr(void *storage) FL_NOEXCEPT
Definition bit_cast.h:60
u32 T2
Additional high time for bit 1 (nanoseconds)
Definition led_timing.h:88
u32 T3
Low tail duration (nanoseconds)
Definition led_timing.h:89
u32 T1
High time for bit 0 (nanoseconds)
Definition led_timing.h:87
Inline implementation details for wave8 transposition.