Transposes 4 input bytes into 4-way interleaved format with direct bit extraction. This function is ISR-safe: no allocations, no exceptions, minimal overhead. Inline functions are automatically placed where needed - no IRAM_ATTR required.
359 {
360 for (size_t byte_idx = 0; byte_idx < num_bytes; byte_idx++) {
361 u8 a = lanes[0][byte_idx];
362 u8 b = lanes[1][byte_idx];
363 u8 c = lanes[2][byte_idx];
364 u8 d = lanes[3][byte_idx];
365
366 u8* dest = &output[byte_idx * 4];
367
368 dest[0] = ((a >> 7) & 0x01) << 0 | ((b >> 7) & 0x01) << 1 | ((c >> 7) & 0x01) << 2 | ((d >> 7) & 0x01) << 3 |
369 ((a >> 6) & 0x01) << 4 | ((b >> 6) & 0x01) << 5 | ((c >> 6) & 0x01) << 6 | ((d >> 6) & 0x01) << 7;
370
371 dest[1] = ((a >> 5) & 0x01) << 0 | ((b >> 5) & 0x01) << 1 | ((c >> 5) & 0x01) << 2 | ((d >> 5) & 0x01) << 3 |
372 ((a >> 4) & 0x01) << 4 | ((b >> 4) & 0x01) << 5 | ((c >> 4) & 0x01) << 6 | ((d >> 4) & 0x01) << 7;
373
374 dest[2] = ((a >> 3) & 0x01) << 0 | ((b >> 3) & 0x01) << 1 | ((c >> 3) & 0x01) << 2 | ((d >> 3) & 0x01) << 3 |
375 ((a >> 2) & 0x01) << 4 | ((b >> 2) & 0x01) << 5 | ((c >> 2) & 0x01) << 6 | ((d >> 2) & 0x01) << 7;
376
377 dest[3] = ((a >> 1) & 0x01) << 0 | ((b >> 1) & 0x01) << 1 | ((c >> 1) & 0x01) << 2 | ((d >> 1) & 0x01) << 3 |
378 ((a >> 0) & 0x01) << 4 | ((b >> 0) & 0x01) << 5 | ((c >> 0) & 0x01) << 6 | ((d >> 0) & 0x01) << 7;
379 }
380}