|
| Wave8ByteExpansionLut | fl::buildWave8ByteExpansionLUT (const Wave8BitExpansionLut &nibble) |
| | Build a byte-indexed expansion LUT (#2526) from the nibble LUT.
|
| |
| FL_OPTIMIZE_FUNCTION Wave8BitExpansionLut | fl::buildWave8ExpansionLUT (const ChipsetTiming &timing) |
| | Build a Wave8BitExpansionLut from chipset timing data.
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_16 (const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_16 (const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_16_bf1 (const u8(&FL_RESTRICT_PARAM lanes)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)]) |
| | BF1: chipset-aware direct encode for 16-lane Wave8 (#2548 deep-dive).
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_16x2_pipe2 (const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)]) |
| | Pipe2: transpose 16-lane × 2-byte-positions (#2548).
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_16x4_bf1_pipe4 (const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const u8(&FL_RESTRICT_PARAM lanes_c)[16], const u8(&FL_RESTRICT_PARAM lanes_d)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_c)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_d)[16 *sizeof(Wave8Byte)]) |
| | BF1 + pipe4: 4-position-pipelined direct encode (#2548 deep-dive).
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_16x4_pipe4 (const u8(&FL_RESTRICT_PARAM lanes_a)[16], const u8(&FL_RESTRICT_PARAM lanes_b)[16], const u8(&FL_RESTRICT_PARAM lanes_c)[16], const u8(&FL_RESTRICT_PARAM lanes_d)[16], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output_a)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_b)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_c)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output_d)[16 *sizeof(Wave8Byte)]) |
| | Pipe4: transpose 16-lane × 4-byte-positions (#2548).
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_2 (const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_2 (const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_2_bf1 (const u8(&FL_RESTRICT_PARAM lanes)[2], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)]) |
| | BF1 for 2-lane Wave8 (#2548 deep-dive followup).
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_4 (const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_4 (const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_4_bf1 (const u8(&FL_RESTRICT_PARAM lanes)[4], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)]) |
| | BF1 for 4-lane Wave8 (#2548 deep-dive followup).
|
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_8 (const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8BitExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_8 (const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION FL_IRAM void | fl::wave8Transpose_8_bf1 (const u8(&FL_RESTRICT_PARAM lanes)[8], const Wave8ByteExpansionLut &lut, u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)]) |
| | BF1 for 8-lane Wave8 (#2548 deep-dive followup).
|
| |
| FL_OPTIMIZE_FUNCTION void | fl::wave8Untranspose_16 (const u8(&FL_RESTRICT_PARAM transposed)[16 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[16 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION void | fl::wave8Untranspose_2 (const u8(&FL_RESTRICT_PARAM transposed)[2 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[2 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION void | fl::wave8Untranspose_4 (const u8(&FL_RESTRICT_PARAM transposed)[4 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[4 *sizeof(Wave8Byte)]) |
| |
| FL_OPTIMIZE_FUNCTION void | fl::wave8Untranspose_8 (const u8(&FL_RESTRICT_PARAM transposed)[8 *sizeof(Wave8Byte)], u8(&FL_RESTRICT_PARAM output)[8 *sizeof(Wave8Byte)]) |
| |