FastLED 3.9.15
Loading...
Searching...
No Matches

◆ pnoise2d_raw_simd4_vec()

fl::simd::simd_u32x4 fl::perlin_s16x16_simd::pnoise2d_raw_simd4_vec ( fl::simd::simd_u32x4 nx_vec,
fl::simd::simd_u32x4 ny_vec,
const fl::i32 * fade_lut,
const fl::u8 * perm )
static

Definition at line 19 of file perlin_s16x16_simd.cpp.hpp.

22{
23 // SIMD: Extract integer floor (shift right by FP_BITS)
24 fl::simd::simd_u32x4 X_vec = fl::simd::srl_u32_4(nx_vec, FP_BITS);
25 fl::simd::simd_u32x4 Y_vec = fl::simd::srl_u32_4(ny_vec, FP_BITS);
26
27 // SIMD: Extract fractional part and shift to HP_BITS
28 // Convert from Q16.16 to Q8.24 by shifting left 8 bits
29 fl::simd::simd_u32x4 mask_fp = fl::simd::set1_u32_4(FP_ONE - 1);
30 fl::simd::simd_u32x4 x_frac_vec = fl::simd::and_u32_4(nx_vec, mask_fp);
31 fl::simd::simd_u32x4 y_frac_vec = fl::simd::and_u32_4(ny_vec, mask_fp);
32
33 // Use the new sll_u32_4 operation for left shift
34 x_frac_vec = fl::simd::sll_u32_4(x_frac_vec, 8);
35 y_frac_vec = fl::simd::sll_u32_4(y_frac_vec, 8);
36
37 // SIMD: Wrap to [0, 255]
38 fl::simd::simd_u32x4 mask_255 = fl::simd::set1_u32_4(255);
39 X_vec = fl::simd::and_u32_4(X_vec, mask_255);
40 Y_vec = fl::simd::and_u32_4(Y_vec, mask_255);
41 // ── [end SIMD coordinate arithmetic] ──────────────────────────────────────
42
43 // ── [BOUNDARY D+E: SIMD extract → per-lane scalar → SIMD re-pack] ───────
44 // Extract coordinates directly from SIMD registers (no intermediate arrays).
45 // Each lane does: fade LUT, perm table, grad, lerp — fully self-contained.
46 // SSE2 has no integer gather, so scalar random-access is unavoidable.
47 constexpr int SHIFT = HP_BITS - fl::s16x16::FRAC_BITS;
48 auto lane = [&](int i) -> fl::i32 {
49 fl::u32 Xi = fl::simd::extract_u32_4(X_vec, i);
50 fl::u32 Yi = fl::simd::extract_u32_4(Y_vec, i);
51 fl::i32 xf = static_cast<fl::i32>(fl::simd::extract_u32_4(x_frac_vec, i));
52 fl::i32 yf = static_cast<fl::i32>(fl::simd::extract_u32_4(y_frac_vec, i));
53
54 fl::i32 u = perlin_s16x16::fade(xf, fade_lut);
55 fl::i32 v = perlin_s16x16::fade(yf, fade_lut);
56
57 int A = perm[Xi & 255] + Yi;
58 int AA = perm[A & 255];
59 int AB = perm[(A + 1) & 255];
60 int B = perm[(Xi + 1) & 255] + Yi;
61 int BA = perm[B & 255];
62 int BB = perm[(B + 1) & 255];
63
64 fl::i32 g_aa = perlin_s16x16::grad(perm[AA & 255], xf, yf);
65 fl::i32 g_ba = perlin_s16x16::grad(perm[BA & 255], xf - HP_ONE, yf);
66 fl::i32 g_ab = perlin_s16x16::grad(perm[AB & 255], xf, yf - HP_ONE);
67 fl::i32 g_bb = perlin_s16x16::grad(perm[BB & 255], xf - HP_ONE, yf - HP_ONE);
68 fl::i32 lerp1 = perlin_s16x16::lerp(u, g_aa, g_ba);
69 fl::i32 lerp2 = perlin_s16x16::lerp(u, g_ab, g_bb);
70 return perlin_s16x16::lerp(v, lerp1, lerp2) >> SHIFT;
71 };
72 return fl::simd::set_u32_4(
73 static_cast<fl::u32>(lane(0)), static_cast<fl::u32>(lane(1)),
74 static_cast<fl::u32>(lane(2)), static_cast<fl::u32>(lane(3)));
75 // ── [end BOUNDARY D+E] ───────────────────────────────────────────────────
76}
static constexpr int FRAC_BITS
Definition s16x16.h:22
platforms::simd_u32x4 simd_u32x4
Definition s16x16x4.h:27
FL_DISABLE_WARNING_PUSH unsigned char * B
static constexpr fl::i32 FP_ONE
static constexpr int HP_BITS
static constexpr int FP_BITS
static constexpr fl::i32 HP_ONE
static fl::i32 fade(fl::i32 t, const fl::i32 *table)
static fl::i32 lerp(fl::i32 t, fl::i32 a, fl::i32 b)
static fl::i32 grad(int hash, fl::i32 x, fl::i32 y)

References fl::B, fl::perlin_s16x16::fade(), FP_BITS, FP_ONE, fl::s16x16::FRAC_BITS, fl::perlin_s16x16::grad(), HP_BITS, HP_ONE, and fl::perlin_s16x16::lerp().

Referenced by fl::anonymous_namespace{chasing_spirals.cpp.hpp}::simd4_processChannel().

+ Here is the call graph for this function:
+ Here is the caller graph for this function: