25 const Wave8BitExpansionLut &lut,
30 Wave8Byte laneWaveformSymbols[2];
42 const Wave8BitExpansionLut &lut,
45 Wave8Byte laneWaveformSymbols[4];
48 for (
int lane = 0; lane < 4; lane++) {
58 const Wave8BitExpansionLut &lut,
61 Wave8Byte laneWaveformSymbols[8];
64 for (
int lane = 0; lane < 8; lane++) {
74 const Wave8BitExpansionLut &lut,
77 Wave8Byte laneWaveformSymbols[16];
80 for (
int lane = 0; lane < 16; lane++) {
94 const Wave8ByteExpansionLut &lut,
96 Wave8Byte laneWaveformSymbols[2];
104 const Wave8ByteExpansionLut &lut,
106 Wave8Byte laneWaveformSymbols[4];
107 for (
int lane = 0; lane < 4; lane++) {
115 const Wave8ByteExpansionLut &lut,
117 Wave8Byte laneWaveformSymbols[8];
118 for (
int lane = 0; lane < 8; lane++) {
126 const Wave8ByteExpansionLut &lut,
128 Wave8Byte laneWaveformSymbols[16];
129 for (
int lane = 0; lane < 16; lane++) {
138 const Wave8ByteExpansionLut &lut,
143 Wave8Byte laneWaveformsA[16];
144 Wave8Byte laneWaveformsB[16];
145 for (
int lane = 0; lane < 16; lane++) {
148 for (
int lane = 0; lane < 16; lane++) {
160 const Wave8ByteExpansionLut &lut,
165 const u8 W0 = lut.lut[0x00].symbols[0].data;
166 const u8 W1 = lut.lut[0xFF].symbols[0].data;
172 const Wave8ByteExpansionLut &lut,
174 const u8 W0 = lut.lut[0x00].symbols[0].data;
175 const u8 W1 = lut.lut[0xFF].symbols[0].data;
181 const Wave8ByteExpansionLut &lut,
183 const u8 W0 = lut.lut[0x00].symbols[0].data;
184 const u8 W1 = lut.lut[0xFF].symbols[0].data;
190 const Wave8ByteExpansionLut &lut,
192 const u8 W0 = lut.lut[0x00].symbols[0].data;
193 const u8 W1 = lut.lut[0xFF].symbols[0].data;
202 const Wave8ByteExpansionLut &lut,
207 const u8 W0 = lut.lut[0x00].symbols[0].data;
208 const u8 W1 = lut.lut[0xFF].symbols[0].data;
211 output_a, output_b, output_c, output_d);
219 const Wave8ByteExpansionLut &lut,
224 Wave8Byte laneWaveformsA[16];
225 Wave8Byte laneWaveformsB[16];
226 Wave8Byte laneWaveformsC[16];
227 Wave8Byte laneWaveformsD[16];
228 for (
int lane = 0; lane < 16; lane++) {
231 for (
int lane = 0; lane < 16; lane++) {
234 for (
int lane = 0; lane < 16; lane++) {
237 for (
int lane = 0; lane < 16; lane++) {
241 laneWaveformsC, laneWaveformsD,
242 output_a, output_b, output_c, output_d);
251 Wave8BitExpansionLut lut;
257 const u32 t0h = timing.
T1;
258 const u32 t1h = timing.
T1 + timing.
T2;
259 const u32 period = timing.
T1 + timing.
T2 + timing.
T3;
262 const float t0h_norm =
static_cast<float>(t0h) / period;
263 const float t1h_norm =
static_cast<float>(t1h) / period;
269 static_cast<u32
>(t0h_norm * 8.0f + 0.5f);
271 static_cast<u32
>(t1h_norm * 8.0f + 0.5f);
281 u8 bit0_waveform = 0;
282 u8 bit1_waveform = 0;
285 for (u32 i = 0; i < pulses_bit0; i++) {
286 bit0_waveform |= (0x80 >> i);
290 for (u32 i = 0; i < pulses_bit1; i++) {
291 bit1_waveform |= (0x80 >> i);
295 for (
u8 nibble = 0; nibble < 16; nibble++) {
297 for (
int bit_pos = 3; bit_pos >= 0; bit_pos--) {
299 const bool bit_set = (nibble >> bit_pos) & 1;
300 const u8 waveform = bit_set ? bit1_waveform : bit0_waveform;
303 lut.lut[nibble][3 - bit_pos].data = waveform;
314 Wave8ByteExpansionLut out;
315 for (
int b = 0; b < 256; ++b) {
316 const Wave8Bit *hi = nibble.lut[(b >> 4) & 0xF];
317 const Wave8Bit *lo = nibble.lut[b & 0xF];
318 for (
int i = 0; i < 4; ++i) {
319 out.lut[b].symbols[i] = hi[i];
320 out.lut[b].symbols[i + 4] = lo[i];
337 Wave8Byte lane_waves[2];
340 for (
int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
342 u16 interleaved = ((u16)transposed[symbol_idx * 2] << 8) |
343 transposed[symbol_idx * 2 + 1];
351 for (
int bit = 0; bit < 8; bit++) {
353 if (interleaved & (1 << (bit * 2 + 1))) {
354 lane0_bits |= (1 << bit);
357 if (interleaved & (1 << (bit * 2))) {
358 lane1_bits |= (1 << bit);
362 lane_waves[0].symbols[symbol_idx].data = lane0_bits;
363 lane_waves[1].symbols[symbol_idx].data = lane1_bits;
368 fl::isr::memcpy(output +
sizeof(Wave8Byte), &lane_waves[1],
sizeof(Wave8Byte));
378 Wave8Byte lane_waves[4];
381 for (
int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
382 u8 lane_bytes[4] = {0, 0, 0, 0};
385 for (
int byte_idx = 0; byte_idx < 4; byte_idx++) {
386 u8 input_byte = transposed[symbol_idx * 4 + byte_idx];
389 int pulse_bit_hi = 7 - (byte_idx * 2);
390 int pulse_bit_lo = pulse_bit_hi - 1;
394 for (
int lane = 0; lane < 4; lane++) {
396 u8 pulse_hi = (input_byte >> (4 + lane)) & 1;
397 u8 pulse_lo = (input_byte >> lane) & 1;
400 lane_bytes[lane] |= (pulse_hi << pulse_bit_hi);
401 lane_bytes[lane] |= (pulse_lo << pulse_bit_lo);
406 for (
int lane = 0; lane < 4; lane++) {
407 lane_waves[lane].symbols[symbol_idx].data = lane_bytes[lane];
412 for (
int lane = 0; lane < 4; lane++) {
413 fl::isr::memcpy(output + lane *
sizeof(Wave8Byte), &lane_waves[lane],
sizeof(Wave8Byte));
424 Wave8Byte lane_waves[8];
427 for (
int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
428 u8 lane_bytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
433 for (
int byte_idx = 0; byte_idx < 8; byte_idx++) {
434 u8 input_byte = transposed[symbol_idx * 8 + byte_idx];
438 int pulse_bit = 7 - byte_idx;
442 for (
int lane = 0; lane < 8; lane++) {
444 u8 pulse = (input_byte >> lane) & 1;
447 lane_bytes[lane] |= (pulse << pulse_bit);
452 for (
int lane = 0; lane < 8; lane++) {
453 lane_waves[lane].symbols[symbol_idx].data = lane_bytes[lane];
458 for (
int lane = 0; lane < 8; lane++) {
459 fl::isr::memcpy(output + lane *
sizeof(Wave8Byte), &lane_waves[lane],
sizeof(Wave8Byte));
470 Wave8Byte lane_waves[16];
473 for (
int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
474 u8 lane_bytes[16] = {0};
477 for (
int pulse_idx = 0; pulse_idx < 8; pulse_idx++) {
478 int pulse_bit = 7 - pulse_idx;
481 int input_offset = symbol_idx * 16 + pulse_idx * 2;
482 u16 input_word = (u16)transposed[input_offset] |
483 ((u16)transposed[input_offset + 1] << 8);
487 for (
int lane = 0; lane < 16; lane++) {
489 u8 pulse = (input_word >> lane) & 1;
492 lane_bytes[lane] |= (pulse << pulse_bit);
497 for (
int lane = 0; lane < 16; lane++) {
498 lane_waves[lane].symbols[symbol_idx].data = lane_bytes[lane];
503 for (
int lane = 0; lane < 16; lane++) {
504 fl::isr::memcpy(output + lane *
sizeof(Wave8Byte), &lane_waves[lane],
sizeof(Wave8Byte));
Centralized LED chipset timing definitions with nanosecond precision.
ISR-safe memory operations (inline, header-only)
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_4_bf1(const u8 lanes[4], u8 W0, u8 W1, u8 output[4 *sizeof(Wave8Byte)])
BF1 for 4-lane Wave8.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_2_bf1(const u8 lanes[2], u8 W0, u8 W1, u8 output[2 *sizeof(Wave8Byte)])
BF1 for 2-lane Wave8.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16(const Wave8Byte lane_waves[16], u8 output[16 *sizeof(Wave8Byte)])
Transpose 16 lanes of Wave8Byte data into interleaved format.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_8_bf1(const u8 lanes[8], u8 W0, u8 W1, u8 output[8 *sizeof(Wave8Byte)])
BF1 for 8-lane Wave8 — same algebraic identity as 16-lane BF1.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_convert_byte_to_wave8byte(u8 byte_value, const Wave8BitExpansionLut &lut, Wave8Byte *output)
Helper: Convert byte to Wave8Byte using nibble LUT (internal use only)
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_8(const Wave8Byte lane_waves[8], u8 output[8 *sizeof(Wave8Byte)])
Transpose 8 lanes of Wave8Byte data into interleaved format.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16x4_pipe4(const Wave8Byte lane_waves_a[16], const Wave8Byte lane_waves_b[16], const Wave8Byte lane_waves_c[16], const Wave8Byte lane_waves_d[16], u8 output_a[16 *sizeof(Wave8Byte)], u8 output_b[16 *sizeof(Wave8Byte)], u8 output_c[16 *sizeof(Wave8Byte)], u8 output_d[16 *sizeof(Wave8Byte)])
Pipe4: transpose 16-lane × 4-byte-positions in one fused call.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_2(const Wave8Byte lane_waves[2], u8 output[2 *sizeof(Wave8Byte)])
Transpose 2 lanes of Wave8Byte data into interleaved format.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_expand_byte(u8 byte_value, const Wave8ByteExpansionLut &lut, Wave8Byte *output)
Byte-indexed expansion (#2526): one indexed 8-byte copy.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16x2_pipe2(const Wave8Byte lane_waves_a[16], const Wave8Byte lane_waves_b[16], u8 output_a[16 *sizeof(Wave8Byte)], u8 output_b[16 *sizeof(Wave8Byte)])
Pipe2: transpose 16-lane × 2-byte-positions in one fused call.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_4(const Wave8Byte lane_waves[4], u8 output[4 *sizeof(Wave8Byte)])
Transpose 4 lanes of Wave8Byte data into interleaved format.
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16_bf1(const u8 lanes[16], u8 W0, u8 W1, u8 output[16 *sizeof(Wave8Byte)])
BF1: chipset-aware direct encode for Wave8 16-lane (#2548 deep-dive).
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16x4_bf1_pipe4(const u8 lanes_a[16], const u8 lanes_b[16], const u8 lanes_c[16], const u8 lanes_d[16], u8 W0, u8 W1, u8 output_a[16 *sizeof(Wave8Byte)], u8 output_b[16 *sizeof(Wave8Byte)], u8 output_c[16 *sizeof(Wave8Byte)], u8 output_d[16 *sizeof(Wave8Byte)])
BF1 + pipe4: 4-position software-pipelined BF1 (#2548 deep-dive).
FL_OPTIMIZE_FUNCTION FL_IRAM FASTLED_FORCE_INLINE void memcpy(void *FL_RESTRICT_PARAM dst, const void *FL_RESTRICT_PARAM src, size_t num_bytes)
ISR-optimized memcpy with alignment detection and switch dispatch.
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_2_bf1(const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)])
BF1 for 2-lane Wave8 (#2548 deep-dive followup).
FL_OPTIMIZE_FUNCTION void wave8Untranspose_8(const u8(&FL_RESTRICT_PARAM transposed)[8 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION Wave8BitExpansionLut buildWave8ExpansionLUT(const ChipsetTiming &timing)
Build a Wave8BitExpansionLut from chipset timing data.
FL_OPTIMIZE_FUNCTION void wave8Untranspose_16(const u8(&FL_RESTRICT_PARAM transposed)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16_bf1(const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
BF1: chipset-aware direct encode for 16-lane Wave8 (#2548 deep-dive).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16x2_pipe2(const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)])
Pipe2: transpose 16-lane × 2-byte-positions (#2548).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16(const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION void wave8Untranspose_4(const u8(&FL_RESTRICT_PARAM transposed)[4 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_2(const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_8_bf1(const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)])
BF1 for 8-lane Wave8 (#2548 deep-dive followup).
Wave8ByteExpansionLut buildWave8ByteExpansionLUT(const Wave8BitExpansionLut &nibble)
Build a byte-indexed expansion LUT (#2526) from the nibble LUT.
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16x4_pipe4(const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const u8(&FL_RESTRICT_PARAM lanes_c)[16], const u8(&FL_RESTRICT_PARAM lanes_d)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_c)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_d)[16 *sizeof(Wave8Byte)])
Pipe4: transpose 16-lane × 4-byte-positions (#2548).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_4_bf1(const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)])
BF1 for 4-lane Wave8 (#2548 deep-dive followup).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16x4_bf1_pipe4(const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const u8(&FL_RESTRICT_PARAM lanes_c)[16], const u8(&FL_RESTRICT_PARAM lanes_d)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_c)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_d)[16 *sizeof(Wave8Byte)])
BF1 + pipe4: 4-position-pipelined direct encode (#2548 deep-dive).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_8(const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_4(const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION void wave8Untranspose_2(const u8(&FL_RESTRICT_PARAM transposed)[2 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)])
Base definition for an LED controller.
u32 T2
Additional high time for bit 1 (nanoseconds)
u32 T3
Low tail duration (nanoseconds)
u32 T1
High time for bit 0 (nanoseconds)
Generic chipset timing entry Provides T1, T2, T3 timing parameters in nanoseconds for any LED protoco...
Type-safe container for packed 8-bit wave pulse pattern.
#define FL_OPTIMIZATION_LEVEL_O3_BEGIN
#define FL_OPTIMIZATION_LEVEL_O3_END
#define FL_OPTIMIZE_FUNCTION
#define FL_RESTRICT_PARAM
Inline implementation details for wave8 transposition.