FastLED 3.9.15
Loading...
Searching...
No Matches
wave8.cpp.hpp
Go to the documentation of this file.
1
9
10#include "fl/channels/wave8.h"
13#include "fl/stl/isr/memcpy.h"
14
16
17namespace fl {
18
19// ============================================================================
20// Public Transposition Functions
21// ============================================================================
22
24void wave8Transpose_2(const u8 (&FL_RESTRICT_PARAM lanes)[2],
25 const Wave8BitExpansionLut &lut,
26 u8 (&FL_RESTRICT_PARAM output)[2 * sizeof(Wave8Byte)]) {
27 // Allocate waveform buffers on stack (16 Wave8Bit total: 8 packed bytes per lane × 2 lanes)
28 // Each Wave8Byte is 8 bytes (8 Wave8Bit × 1 byte each)
29 // Layout: [Lane0_bit7, Lane0_bit6, ..., Lane0_bit0, Lane1_bit7, Lane1_bit6, ..., Lane1_bit0]
30 Wave8Byte laneWaveformSymbols[2];
31
32 // Convert each lane byte to wave pulse symbols (8 packed bytes each)
33 detail::wave8_convert_byte_to_wave8byte(lanes[0], lut, &laneWaveformSymbols[0]);
34 detail::wave8_convert_byte_to_wave8byte(lanes[1], lut, &laneWaveformSymbols[1]);
35
36 // Transpose waveforms to DMA format (interleave 8 packed bytes to 16 bytes)
37 detail::wave8_transpose_2(laneWaveformSymbols, output);
38}
39
41void wave8Transpose_4(const u8 (&FL_RESTRICT_PARAM lanes)[4],
42 const Wave8BitExpansionLut &lut,
43 u8 (&FL_RESTRICT_PARAM output)[4 * sizeof(Wave8Byte)]) {
44 // Allocate waveform buffers on stack (32 Wave8Bit total: 8 packed bytes per lane × 4 lanes)
45 Wave8Byte laneWaveformSymbols[4];
46
47 // Convert each lane byte to wave pulse symbols (8 packed bytes each)
48 for (int lane = 0; lane < 4; lane++) {
49 detail::wave8_convert_byte_to_wave8byte(lanes[lane], lut, &laneWaveformSymbols[lane]);
50 }
51
52 // Transpose waveforms to DMA format (interleave 32 packed bytes to 32 bytes)
53 detail::wave8_transpose_4(laneWaveformSymbols, output);
54}
55
57void wave8Transpose_8(const u8 (&FL_RESTRICT_PARAM lanes)[8],
58 const Wave8BitExpansionLut &lut,
59 u8 (&FL_RESTRICT_PARAM output)[8 * sizeof(Wave8Byte)]) {
60 // Allocate waveform buffers on stack (64 Wave8Bit total: 8 packed bytes per lane × 8 lanes)
61 Wave8Byte laneWaveformSymbols[8];
62
63 // Convert each lane byte to wave pulse symbols (8 packed bytes each)
64 for (int lane = 0; lane < 8; lane++) {
65 detail::wave8_convert_byte_to_wave8byte(lanes[lane], lut, &laneWaveformSymbols[lane]);
66 }
67
68 // Transpose waveforms to DMA format (interleave 64 packed bytes to 64 bytes)
69 detail::wave8_transpose_8(laneWaveformSymbols, output);
70}
71
73void wave8Transpose_16(const u8 (&FL_RESTRICT_PARAM lanes)[16],
74 const Wave8BitExpansionLut &lut,
75 u8 (&FL_RESTRICT_PARAM output)[16 * sizeof(Wave8Byte)]) {
76 // Allocate waveform buffers on stack (128 Wave8Bit total: 8 packed bytes per lane × 16 lanes)
77 Wave8Byte laneWaveformSymbols[16];
78
79 // Convert each lane byte to wave pulse symbols (8 packed bytes each)
80 for (int lane = 0; lane < 16; lane++) {
81 detail::wave8_convert_byte_to_wave8byte(lanes[lane], lut, &laneWaveformSymbols[lane]);
82 }
83
84 // Transpose waveforms to DMA format (interleave 128 packed bytes to 128 bytes)
85 detail::wave8_transpose_16(laneWaveformSymbols, output);
86}
87
88// ============================================================================
89// Byte-LUT overloads (#2526): cheaper expansion, same DMA output.
90// ============================================================================
91
93void wave8Transpose_2(const u8 (&FL_RESTRICT_PARAM lanes)[2],
94 const Wave8ByteExpansionLut &lut,
95 u8 (&FL_RESTRICT_PARAM output)[2 * sizeof(Wave8Byte)]) {
96 Wave8Byte laneWaveformSymbols[2];
97 detail::wave8_expand_byte(lanes[0], lut, &laneWaveformSymbols[0]);
98 detail::wave8_expand_byte(lanes[1], lut, &laneWaveformSymbols[1]);
99 detail::wave8_transpose_2(laneWaveformSymbols, output);
100}
101
103void wave8Transpose_4(const u8 (&FL_RESTRICT_PARAM lanes)[4],
104 const Wave8ByteExpansionLut &lut,
105 u8 (&FL_RESTRICT_PARAM output)[4 * sizeof(Wave8Byte)]) {
106 Wave8Byte laneWaveformSymbols[4];
107 for (int lane = 0; lane < 4; lane++) {
108 detail::wave8_expand_byte(lanes[lane], lut, &laneWaveformSymbols[lane]);
109 }
110 detail::wave8_transpose_4(laneWaveformSymbols, output);
111}
112
114void wave8Transpose_8(const u8 (&FL_RESTRICT_PARAM lanes)[8],
115 const Wave8ByteExpansionLut &lut,
116 u8 (&FL_RESTRICT_PARAM output)[8 * sizeof(Wave8Byte)]) {
117 Wave8Byte laneWaveformSymbols[8];
118 for (int lane = 0; lane < 8; lane++) {
119 detail::wave8_expand_byte(lanes[lane], lut, &laneWaveformSymbols[lane]);
120 }
121 detail::wave8_transpose_8(laneWaveformSymbols, output);
122}
123
125void wave8Transpose_16(const u8 (&FL_RESTRICT_PARAM lanes)[16],
126 const Wave8ByteExpansionLut &lut,
127 u8 (&FL_RESTRICT_PARAM output)[16 * sizeof(Wave8Byte)]) {
128 Wave8Byte laneWaveformSymbols[16];
129 for (int lane = 0; lane < 16; lane++) {
130 detail::wave8_expand_byte(lanes[lane], lut, &laneWaveformSymbols[lane]);
131 }
132 detail::wave8_transpose_16(laneWaveformSymbols, output);
133}
134
137 const u8 (&FL_RESTRICT_PARAM lanes_b)[16],
138 const Wave8ByteExpansionLut &lut,
139 u8 (&FL_RESTRICT_PARAM output_a)[16 * sizeof(Wave8Byte)],
140 u8 (&FL_RESTRICT_PARAM output_b)[16 * sizeof(Wave8Byte)]) {
141 // Expand both positions independently — compiler can interleave the two
142 // loops freely because they share no data.
143 Wave8Byte laneWaveformsA[16];
144 Wave8Byte laneWaveformsB[16];
145 for (int lane = 0; lane < 16; lane++) {
146 detail::wave8_expand_byte(lanes_a[lane], lut, &laneWaveformsA[lane]);
147 }
148 for (int lane = 0; lane < 16; lane++) {
149 detail::wave8_expand_byte(lanes_b[lane], lut, &laneWaveformsB[lane]);
150 }
151 // Symbol-major loop with both transposes inlined back-to-back: the two
152 // OR-trees share no dependencies, so the compiler can interleave them and
153 // fill the in-order pipeline bubbles. See #2548.
154 detail::wave8_transpose_16x2_pipe2(laneWaveformsA, laneWaveformsB,
155 output_a, output_b);
156}
157
160 const Wave8ByteExpansionLut &lut,
161 u8 (&FL_RESTRICT_PARAM output)[16 * sizeof(Wave8Byte)]) {
162 // Extract W0/W1 chipset constants from the lut.
163 // byte_lut[0x00].symbols[0] = waveform for input bit 7 == 0 = W0
164 // byte_lut[0xFF].symbols[0] = waveform for input bit 7 == 1 = W1
165 const u8 W0 = lut.lut[0x00].symbols[0].data;
166 const u8 W1 = lut.lut[0xFF].symbols[0].data;
167 detail::wave8_transpose_16_bf1(lanes, W0, W1, output);
168}
169
172 const Wave8ByteExpansionLut &lut,
173 u8 (&FL_RESTRICT_PARAM output)[8 * sizeof(Wave8Byte)]) {
174 const u8 W0 = lut.lut[0x00].symbols[0].data;
175 const u8 W1 = lut.lut[0xFF].symbols[0].data;
176 detail::wave8_transpose_8_bf1(lanes, W0, W1, output);
177}
178
181 const Wave8ByteExpansionLut &lut,
182 u8 (&FL_RESTRICT_PARAM output)[4 * sizeof(Wave8Byte)]) {
183 const u8 W0 = lut.lut[0x00].symbols[0].data;
184 const u8 W1 = lut.lut[0xFF].symbols[0].data;
185 detail::wave8_transpose_4_bf1(lanes, W0, W1, output);
186}
187
190 const Wave8ByteExpansionLut &lut,
191 u8 (&FL_RESTRICT_PARAM output)[2 * sizeof(Wave8Byte)]) {
192 const u8 W0 = lut.lut[0x00].symbols[0].data;
193 const u8 W1 = lut.lut[0xFF].symbols[0].data;
194 detail::wave8_transpose_2_bf1(lanes, W0, W1, output);
195}
196
199 const u8 (&FL_RESTRICT_PARAM lanes_b)[16],
200 const u8 (&FL_RESTRICT_PARAM lanes_c)[16],
201 const u8 (&FL_RESTRICT_PARAM lanes_d)[16],
202 const Wave8ByteExpansionLut &lut,
203 u8 (&FL_RESTRICT_PARAM output_a)[16 * sizeof(Wave8Byte)],
204 u8 (&FL_RESTRICT_PARAM output_b)[16 * sizeof(Wave8Byte)],
205 u8 (&FL_RESTRICT_PARAM output_c)[16 * sizeof(Wave8Byte)],
206 u8 (&FL_RESTRICT_PARAM output_d)[16 * sizeof(Wave8Byte)]) {
207 const u8 W0 = lut.lut[0x00].symbols[0].data;
208 const u8 W1 = lut.lut[0xFF].symbols[0].data;
209 detail::wave8_transpose_16x4_bf1_pipe4(lanes_a, lanes_b, lanes_c, lanes_d,
210 W0, W1,
211 output_a, output_b, output_c, output_d);
212}
213
216 const u8 (&FL_RESTRICT_PARAM lanes_b)[16],
217 const u8 (&FL_RESTRICT_PARAM lanes_c)[16],
218 const u8 (&FL_RESTRICT_PARAM lanes_d)[16],
219 const Wave8ByteExpansionLut &lut,
220 u8 (&FL_RESTRICT_PARAM output_a)[16 * sizeof(Wave8Byte)],
221 u8 (&FL_RESTRICT_PARAM output_b)[16 * sizeof(Wave8Byte)],
222 u8 (&FL_RESTRICT_PARAM output_c)[16 * sizeof(Wave8Byte)],
223 u8 (&FL_RESTRICT_PARAM output_d)[16 * sizeof(Wave8Byte)]) {
224 Wave8Byte laneWaveformsA[16];
225 Wave8Byte laneWaveformsB[16];
226 Wave8Byte laneWaveformsC[16];
227 Wave8Byte laneWaveformsD[16];
228 for (int lane = 0; lane < 16; lane++) {
229 detail::wave8_expand_byte(lanes_a[lane], lut, &laneWaveformsA[lane]);
230 }
231 for (int lane = 0; lane < 16; lane++) {
232 detail::wave8_expand_byte(lanes_b[lane], lut, &laneWaveformsB[lane]);
233 }
234 for (int lane = 0; lane < 16; lane++) {
235 detail::wave8_expand_byte(lanes_c[lane], lut, &laneWaveformsC[lane]);
236 }
237 for (int lane = 0; lane < 16; lane++) {
238 detail::wave8_expand_byte(lanes_d[lane], lut, &laneWaveformsD[lane]);
239 }
240 detail::wave8_transpose_16x4_pipe4(laneWaveformsA, laneWaveformsB,
241 laneWaveformsC, laneWaveformsD,
242 output_a, output_b, output_c, output_d);
243}
244
245// ============================================================================
246// LUT Builder from Timing Data
247// Note: This is not designed to be called from ISR handlers.
248// ============================================================================
250Wave8BitExpansionLut buildWave8ExpansionLUT(const ChipsetTiming &timing) {
251 Wave8BitExpansionLut lut;
252
253 // Step 1: Calculate absolute times from ChipsetTiming format
254 // ChipsetTiming.T1 = T0H (high time for bit 0)
255 // ChipsetTiming.T2 = T1H - T0H (ADDITIONAL high time for bit 1)
256 // ChipsetTiming.T3 = T0L (low tail duration)
257 const u32 t0h = timing.T1; // T0H: bit 0 goes LOW here
258 const u32 t1h = timing.T1 + timing.T2; // T1H: bit 1 goes LOW here
259 const u32 period = timing.T1 + timing.T2 + timing.T3; // Total period
260
261 // Step 2: Normalize absolute times
262 const float t0h_norm = static_cast<float>(t0h) / period;
263 const float t1h_norm = static_cast<float>(t1h) / period;
264
265 // Step 3: Convert to pulse counts (fixed 8 pulses per bit)
266 // pulses_bit0: number of HIGH pulses for bit 0 (before it goes LOW at t0h)
267 // pulses_bit1: number of HIGH pulses for bit 1 (before it goes LOW at t1h)
268 u32 pulses_bit0 =
269 static_cast<u32>(t0h_norm * 8.0f + 0.5f); // round
270 u32 pulses_bit1 =
271 static_cast<u32>(t1h_norm * 8.0f + 0.5f); // round
272
273 // Clamp to valid range [0, 8]
274 if (pulses_bit0 > 8)
275 pulses_bit0 = 8;
276 if (pulses_bit1 > 8)
277 pulses_bit1 = 8;
278
279 // Step 4: Generate bit0 and bit1 waveforms (1 byte each, packed format)
280 // Each bit represents one pulse (MSB = first pulse)
281 u8 bit0_waveform = 0;
282 u8 bit1_waveform = 0;
283
284 // Bit 0: Set MSB bits for HIGH pulses
285 for (u32 i = 0; i < pulses_bit0; i++) {
286 bit0_waveform |= (0x80 >> i); // Set bit from MSB
287 }
288
289 // Bit 1: Set MSB bits for HIGH pulses
290 for (u32 i = 0; i < pulses_bit1; i++) {
291 bit1_waveform |= (0x80 >> i); // Set bit from MSB
292 }
293
294 // Step 5: Build LUT for all 16 nibbles
295 for (u8 nibble = 0; nibble < 16; nibble++) {
296 // For each nibble, generate 4 Wave8Bit (one per bit, MSB first)
297 for (int bit_pos = 3; bit_pos >= 0; bit_pos--) {
298 // Extract bit (MSB first: bit 3, 2, 1, 0)
299 const bool bit_set = (nibble >> bit_pos) & 1;
300 const u8 waveform = bit_set ? bit1_waveform : bit0_waveform;
301
302 // Store packed waveform to LUT entry
303 lut.lut[nibble][3 - bit_pos].data = waveform;
304 }
305 }
306
307 return lut;
308}
309
310// Byte-indexed expansion LUT (#2526). Entry b = high-nibble expansion in
311// symbols[0..3] + low-nibble expansion in symbols[4..7], matching
312// wave8_convert_byte_to_wave8byte() exactly (so the byte path is bit-identical).
313Wave8ByteExpansionLut buildWave8ByteExpansionLUT(const Wave8BitExpansionLut &nibble) {
314 Wave8ByteExpansionLut out;
315 for (int b = 0; b < 256; ++b) {
316 const Wave8Bit *hi = nibble.lut[(b >> 4) & 0xF];
317 const Wave8Bit *lo = nibble.lut[b & 0xF];
318 for (int i = 0; i < 4; ++i) {
319 out.lut[b].symbols[i] = hi[i];
320 out.lut[b].symbols[i + 4] = lo[i];
321 }
322 }
323 return out;
324}
325
326// ============================================================================
327// Untranspose Functions (Testing Only - Not Optimized)
328// ============================================================================
329
331void wave8Untranspose_2(const u8 (&FL_RESTRICT_PARAM transposed)[2 * sizeof(Wave8Byte)],
332 u8 (&FL_RESTRICT_PARAM output)[2 * sizeof(Wave8Byte)]) {
333 // Reverse the 2-lane transposition
334 // Input: 16 bytes of interleaved data (2 bytes per symbol, 8 symbols)
335 // Output: 2 Wave8Byte structures (16 bytes total, de-interleaved)
336
337 Wave8Byte lane_waves[2];
338
339 // Process each of the 8 symbols
340 for (int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
341 // Read the 2 interleaved bytes for this symbol
342 u16 interleaved = ((u16)transposed[symbol_idx * 2] << 8) |
343 transposed[symbol_idx * 2 + 1];
344
345 // De-interleave bits back to lanes
346 u8 lane0_bits = 0;
347 u8 lane1_bits = 0;
348
349 // Extract bits: interleaved format has alternating bits [L0, L1, L0, L1, ...]
350 // Bits are ordered: [L1_b7, L0_b7, L1_b6, L0_b6, L1_b5, L0_b5, L1_b4, L0_b4, ...]
351 for (int bit = 0; bit < 8; bit++) {
352 // Lane 0 bits are at odd positions (shifted left by 1)
353 if (interleaved & (1 << (bit * 2 + 1))) {
354 lane0_bits |= (1 << bit);
355 }
356 // Lane 1 bits are at even positions
357 if (interleaved & (1 << (bit * 2))) {
358 lane1_bits |= (1 << bit);
359 }
360 }
361
362 lane_waves[0].symbols[symbol_idx].data = lane0_bits;
363 lane_waves[1].symbols[symbol_idx].data = lane1_bits;
364 }
365
366 // Copy de-interleaved data to output
367 fl::isr::memcpy(output, &lane_waves[0], sizeof(Wave8Byte));
368 fl::isr::memcpy(output + sizeof(Wave8Byte), &lane_waves[1], sizeof(Wave8Byte));
369}
370
372void wave8Untranspose_4(const u8 (&FL_RESTRICT_PARAM transposed)[4 * sizeof(Wave8Byte)],
373 u8 (&FL_RESTRICT_PARAM output)[4 * sizeof(Wave8Byte)]) {
374 // Reverse the 4-lane transposition
375 // Input: 32 bytes of interleaved data (4 bytes per symbol, 8 symbols)
376 // Output: 4 Wave8Byte structures (32 bytes total, de-interleaved)
377
378 Wave8Byte lane_waves[4];
379
380 // Process each of the 8 symbols
381 for (int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
382 u8 lane_bytes[4] = {0, 0, 0, 0};
383
384 // Process 4 input bytes (2 pulses per byte)
385 for (int byte_idx = 0; byte_idx < 4; byte_idx++) {
386 u8 input_byte = transposed[symbol_idx * 4 + byte_idx];
387
388 // Calculate which pulse bits these correspond to
389 int pulse_bit_hi = 7 - (byte_idx * 2);
390 int pulse_bit_lo = pulse_bit_hi - 1;
391
392 // De-interleave 4 lanes from this byte
393 // Bit layout: [L3_hi, L2_hi, L1_hi, L0_hi, L3_lo, L2_lo, L1_lo, L0_lo]
394 for (int lane = 0; lane < 4; lane++) {
395 // Extract bits for this lane
396 u8 pulse_hi = (input_byte >> (4 + lane)) & 1;
397 u8 pulse_lo = (input_byte >> lane) & 1;
398
399 // Reconstruct lane byte
400 lane_bytes[lane] |= (pulse_hi << pulse_bit_hi);
401 lane_bytes[lane] |= (pulse_lo << pulse_bit_lo);
402 }
403 }
404
405 // Store de-interleaved bytes
406 for (int lane = 0; lane < 4; lane++) {
407 lane_waves[lane].symbols[symbol_idx].data = lane_bytes[lane];
408 }
409 }
410
411 // Copy de-interleaved data to output
412 for (int lane = 0; lane < 4; lane++) {
413 fl::isr::memcpy(output + lane * sizeof(Wave8Byte), &lane_waves[lane], sizeof(Wave8Byte));
414 }
415}
416
418void wave8Untranspose_8(const u8 (&FL_RESTRICT_PARAM transposed)[8 * sizeof(Wave8Byte)],
419 u8 (&FL_RESTRICT_PARAM output)[8 * sizeof(Wave8Byte)]) {
420 // Reverse the 8-lane transposition
421 // Input: 64 bytes of interleaved data (8 bytes per symbol, 8 symbols)
422 // Output: 8 Wave8Byte structures (64 bytes total, de-interleaved)
423
424 Wave8Byte lane_waves[8];
425
426 // Process each of the 8 symbols
427 for (int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
428 u8 lane_bytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
429
430 // Process 8 input bytes (1 pulse per byte)
431 // After transpose+reversal: byte 0 = bit 7 (first pulse, MSB),
432 // byte 7 = bit 0 (last pulse, LSB)
433 for (int byte_idx = 0; byte_idx < 8; byte_idx++) {
434 u8 input_byte = transposed[symbol_idx * 8 + byte_idx];
435
436 // Calculate which pulse bit this corresponds to
437 // byte 0 = bit 7, byte 7 = bit 0 (reversed order)
438 int pulse_bit = 7 - byte_idx;
439
440 // De-interleave 8 lanes from this byte
441 // Bit layout: [L7, L6, L5, L4, L3, L2, L1, L0]
442 for (int lane = 0; lane < 8; lane++) {
443 // Extract bit for this lane (lane 0 = LSB, lane 7 = MSB)
444 u8 pulse = (input_byte >> lane) & 1;
445
446 // Reconstruct lane byte
447 lane_bytes[lane] |= (pulse << pulse_bit);
448 }
449 }
450
451 // Store de-interleaved bytes
452 for (int lane = 0; lane < 8; lane++) {
453 lane_waves[lane].symbols[symbol_idx].data = lane_bytes[lane];
454 }
455 }
456
457 // Copy de-interleaved data to output
458 for (int lane = 0; lane < 8; lane++) {
459 fl::isr::memcpy(output + lane * sizeof(Wave8Byte), &lane_waves[lane], sizeof(Wave8Byte));
460 }
461}
462
464void wave8Untranspose_16(const u8 (&FL_RESTRICT_PARAM transposed)[16 * sizeof(Wave8Byte)],
465 u8 (&FL_RESTRICT_PARAM output)[16 * sizeof(Wave8Byte)]) {
466 // Reverse the 16-lane transposition
467 // Input: 128 bytes of interleaved data (16 bytes per symbol, 8 symbols)
468 // Output: 16 Wave8Byte structures (128 bytes total, de-interleaved)
469
470 Wave8Byte lane_waves[16];
471
472 // Process each of the 8 symbols
473 for (int symbol_idx = 0; symbol_idx < 8; symbol_idx++) {
474 u8 lane_bytes[16] = {0};
475
476 // Process 8 pulses (16 bytes total: 2 bytes per pulse)
477 for (int pulse_idx = 0; pulse_idx < 8; pulse_idx++) {
478 int pulse_bit = 7 - pulse_idx;
479
480 // Read 16-bit word for this pulse
481 int input_offset = symbol_idx * 16 + pulse_idx * 2;
482 u16 input_word = (u16)transposed[input_offset] |
483 ((u16)transposed[input_offset + 1] << 8);
484
485 // De-interleave 16 lanes from this word
486 // Bit layout: [L15, L14, L13, ..., L1, L0]
487 for (int lane = 0; lane < 16; lane++) {
488 // Extract bit for this lane (lane 0 = LSB, lane 15 = MSB)
489 u8 pulse = (input_word >> lane) & 1;
490
491 // Reconstruct lane byte
492 lane_bytes[lane] |= (pulse << pulse_bit);
493 }
494 }
495
496 // Store de-interleaved bytes
497 for (int lane = 0; lane < 16; lane++) {
498 lane_waves[lane].symbols[symbol_idx].data = lane_bytes[lane];
499 }
500 }
501
502 // Copy de-interleaved data to output
503 for (int lane = 0; lane < 16; lane++) {
504 fl::isr::memcpy(output + lane * sizeof(Wave8Byte), &lane_waves[lane], sizeof(Wave8Byte));
505 }
506}
507
508} // namespace fl
509
Centralized LED chipset timing definitions with nanosecond precision.
ISR-safe memory operations (inline, header-only)
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_4_bf1(const u8 lanes[4], u8 W0, u8 W1, u8 output[4 *sizeof(Wave8Byte)])
BF1 for 4-lane Wave8.
Definition wave8.hpp:370
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_2_bf1(const u8 lanes[2], u8 W0, u8 W1, u8 output[2 *sizeof(Wave8Byte)])
BF1 for 2-lane Wave8.
Definition wave8.hpp:417
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16(const Wave8Byte lane_waves[16], u8 output[16 *sizeof(Wave8Byte)])
Transpose 16 lanes of Wave8Byte data into interleaved format.
Definition wave8.hpp:231
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_8_bf1(const u8 lanes[8], u8 W0, u8 W1, u8 output[8 *sizeof(Wave8Byte)])
BF1 for 8-lane Wave8 — same algebraic identity as 16-lane BF1.
Definition wave8.hpp:345
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_convert_byte_to_wave8byte(u8 byte_value, const Wave8BitExpansionLut &lut, Wave8Byte *output)
Helper: Convert byte to Wave8Byte using nibble LUT (internal use only)
Definition wave8.hpp:47
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_8(const Wave8Byte lane_waves[8], u8 output[8 *sizeof(Wave8Byte)])
Transpose 8 lanes of Wave8Byte data into interleaved format.
Definition wave8.hpp:205
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16x4_pipe4(const Wave8Byte lane_waves_a[16], const Wave8Byte lane_waves_b[16], const Wave8Byte lane_waves_c[16], const Wave8Byte lane_waves_d[16], u8 output_a[16 *sizeof(Wave8Byte)], u8 output_b[16 *sizeof(Wave8Byte)], u8 output_c[16 *sizeof(Wave8Byte)], u8 output_d[16 *sizeof(Wave8Byte)])
Pipe4: transpose 16-lane × 4-byte-positions in one fused call.
Definition wave8.hpp:273
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_2(const Wave8Byte lane_waves[2], u8 output[2 *sizeof(Wave8Byte)])
Transpose 2 lanes of Wave8Byte data into interleaved format.
Definition wave8.hpp:103
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_expand_byte(u8 byte_value, const Wave8ByteExpansionLut &lut, Wave8Byte *output)
Byte-indexed expansion (#2526): one indexed 8-byte copy.
Definition wave8.hpp:69
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16x2_pipe2(const Wave8Byte lane_waves_a[16], const Wave8Byte lane_waves_b[16], u8 output_a[16 *sizeof(Wave8Byte)], u8 output_b[16 *sizeof(Wave8Byte)])
Pipe2: transpose 16-lane × 2-byte-positions in one fused call.
Definition wave8.hpp:249
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_4(const Wave8Byte lane_waves[4], u8 output[4 *sizeof(Wave8Byte)])
Transpose 4 lanes of Wave8Byte data into interleaved format.
Definition wave8.hpp:126
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16_bf1(const u8 lanes[16], u8 W0, u8 W1, u8 output[16 *sizeof(Wave8Byte)])
BF1: chipset-aware direct encode for Wave8 16-lane (#2548 deep-dive).
Definition wave8.hpp:315
FASTLED_FORCE_INLINE FL_IRAM FL_OPTIMIZE_FUNCTION void wave8_transpose_16x4_bf1_pipe4(const u8 lanes_a[16], const u8 lanes_b[16], const u8 lanes_c[16], const u8 lanes_d[16], u8 W0, u8 W1, u8 output_a[16 *sizeof(Wave8Byte)], u8 output_b[16 *sizeof(Wave8Byte)], u8 output_c[16 *sizeof(Wave8Byte)], u8 output_d[16 *sizeof(Wave8Byte)])
BF1 + pipe4: 4-position software-pipelined BF1 (#2548 deep-dive).
Definition wave8.hpp:464
FL_OPTIMIZE_FUNCTION FL_IRAM FASTLED_FORCE_INLINE void memcpy(void *FL_RESTRICT_PARAM dst, const void *FL_RESTRICT_PARAM src, size_t num_bytes)
ISR-optimized memcpy with alignment detection and switch dispatch.
Definition memcpy.h:75
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_2_bf1(const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)])
BF1 for 2-lane Wave8 (#2548 deep-dive followup).
unsigned char u8
Definition stdint.h:131
FL_OPTIMIZE_FUNCTION void wave8Untranspose_8(const u8(&FL_RESTRICT_PARAM transposed)[8 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION Wave8BitExpansionLut buildWave8ExpansionLUT(const ChipsetTiming &timing)
Build a Wave8BitExpansionLut from chipset timing data.
FL_OPTIMIZE_FUNCTION void wave8Untranspose_16(const u8(&FL_RESTRICT_PARAM transposed)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16_bf1(const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
BF1: chipset-aware direct encode for 16-lane Wave8 (#2548 deep-dive).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16x2_pipe2(const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)])
Pipe2: transpose 16-lane × 2-byte-positions (#2548).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16(const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)])
Definition wave8.cpp.hpp:73
FL_OPTIMIZE_FUNCTION void wave8Untranspose_4(const u8(&FL_RESTRICT_PARAM transposed)[4 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)])
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_2(const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)])
Definition wave8.cpp.hpp:24
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_8_bf1(const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)])
BF1 for 8-lane Wave8 (#2548 deep-dive followup).
Wave8ByteExpansionLut buildWave8ByteExpansionLUT(const Wave8BitExpansionLut &nibble)
Build a byte-indexed expansion LUT (#2526) from the nibble LUT.
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16x4_pipe4(const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const u8(&FL_RESTRICT_PARAM lanes_c)[16], const u8(&FL_RESTRICT_PARAM lanes_d)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_c)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_d)[16 *sizeof(Wave8Byte)])
Pipe4: transpose 16-lane × 4-byte-positions (#2548).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_4_bf1(const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)])
BF1 for 4-lane Wave8 (#2548 deep-dive followup).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_16x4_bf1_pipe4(const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const u8(&FL_RESTRICT_PARAM lanes_c)[16], const u8(&FL_RESTRICT_PARAM lanes_d)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_c)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_d)[16 *sizeof(Wave8Byte)])
BF1 + pipe4: 4-position-pipelined direct encode (#2548 deep-dive).
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_8(const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)])
Definition wave8.cpp.hpp:57
FL_OPTIMIZE_FUNCTION FL_IRAM void wave8Transpose_4(const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)])
Definition wave8.cpp.hpp:41
FL_OPTIMIZE_FUNCTION void wave8Untranspose_2(const u8(&FL_RESTRICT_PARAM transposed)[2 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)])
@ W1
White is second.
Definition eorder.h:26
@ W0
White is first.
Definition eorder.h:27
Base definition for an LED controller.
Definition crgb.hpp:179
u32 T2
Additional high time for bit 1 (nanoseconds)
Definition led_timing.h:88
u32 T3
Low tail duration (nanoseconds)
Definition led_timing.h:89
u32 T1
High time for bit 0 (nanoseconds)
Definition led_timing.h:87
Generic chipset timing entry Provides T1, T2, T3 timing parameters in nanoseconds for any LED protoco...
Definition led_timing.h:86
Type-safe container for packed 8-bit wave pulse pattern.
Definition wave8.h:22
#define FL_OPTIMIZATION_LEVEL_O3_BEGIN
#define FL_OPTIMIZATION_LEVEL_O3_END
#define FL_OPTIMIZE_FUNCTION
#define FL_IRAM
#define FL_RESTRICT_PARAM
Inline implementation details for wave8 transposition.