FastLED 3.9.15
Loading...
Searching...
No Matches
perlin_s16x16_simd.cpp.hpp
Go to the documentation of this file.
1#pragma once
2// allow-include-after-namespace
3
4// 2D Perlin noise SIMD implementation using s16x16 fixed-point arithmetic
5// Implementation file - included from perlin_s16x16_simd.h
6
9#include "fl/math/simd.h"
11
13
14namespace fl {
15
16// Primary overload: accepts SIMD registers directly.
17// Performs SIMD floor/frac/wrap, then exits to scalar for fade/perm/grad/lerp
18// (SSE2 has no integer gather), and re-packs the result into a SIMD register.
21 const fl::i32 *fade_lut, const fl::u8 *perm)
22{
23 // SIMD: Extract integer floor (shift right by FP_BITS)
24 fl::simd::simd_u32x4 X_vec = fl::simd::srl_u32_4(nx_vec, FP_BITS);
25 fl::simd::simd_u32x4 Y_vec = fl::simd::srl_u32_4(ny_vec, FP_BITS);
26
27 // SIMD: Extract fractional part and shift to HP_BITS
28 // Convert from Q16.16 to Q8.24 by shifting left 8 bits
29 fl::simd::simd_u32x4 mask_fp = fl::simd::set1_u32_4(FP_ONE - 1);
30 fl::simd::simd_u32x4 x_frac_vec = fl::simd::and_u32_4(nx_vec, mask_fp);
31 fl::simd::simd_u32x4 y_frac_vec = fl::simd::and_u32_4(ny_vec, mask_fp);
32
33 // Use the new sll_u32_4 operation for left shift
34 x_frac_vec = fl::simd::sll_u32_4(x_frac_vec, 8);
35 y_frac_vec = fl::simd::sll_u32_4(y_frac_vec, 8);
36
37 // SIMD: Wrap to [0, 255]
38 fl::simd::simd_u32x4 mask_255 = fl::simd::set1_u32_4(255);
39 X_vec = fl::simd::and_u32_4(X_vec, mask_255);
40 Y_vec = fl::simd::and_u32_4(Y_vec, mask_255);
41 // ── [end SIMD coordinate arithmetic] ──────────────────────────────────────
42
43 // ── [BOUNDARY D+E: SIMD extract → per-lane scalar → SIMD re-pack] ───────
44 // Extract coordinates directly from SIMD registers (no intermediate arrays).
45 // Each lane does: fade LUT, perm table, grad, lerp — fully self-contained.
46 // SSE2 has no integer gather, so scalar random-access is unavoidable.
47 constexpr int SHIFT = HP_BITS - fl::s16x16::FRAC_BITS;
48 auto lane = [&](int i) -> fl::i32 {
49 fl::u32 Xi = fl::simd::extract_u32_4(X_vec, i);
50 fl::u32 Yi = fl::simd::extract_u32_4(Y_vec, i);
51 fl::i32 xf = static_cast<fl::i32>(fl::simd::extract_u32_4(x_frac_vec, i));
52 fl::i32 yf = static_cast<fl::i32>(fl::simd::extract_u32_4(y_frac_vec, i));
53
54 fl::i32 u = perlin_s16x16::fade(xf, fade_lut);
55 fl::i32 v = perlin_s16x16::fade(yf, fade_lut);
56
57 int A = perm[Xi & 255] + Yi;
58 int AA = perm[A & 255];
59 int AB = perm[(A + 1) & 255];
60 int B = perm[(Xi + 1) & 255] + Yi;
61 int BA = perm[B & 255];
62 int BB = perm[(B + 1) & 255];
63
64 fl::i32 g_aa = perlin_s16x16::grad(perm[AA & 255], xf, yf);
65 fl::i32 g_ba = perlin_s16x16::grad(perm[BA & 255], xf - HP_ONE, yf);
66 fl::i32 g_ab = perlin_s16x16::grad(perm[AB & 255], xf, yf - HP_ONE);
67 fl::i32 g_bb = perlin_s16x16::grad(perm[BB & 255], xf - HP_ONE, yf - HP_ONE);
68 fl::i32 lerp1 = perlin_s16x16::lerp(u, g_aa, g_ba);
69 fl::i32 lerp2 = perlin_s16x16::lerp(u, g_ab, g_bb);
70 return perlin_s16x16::lerp(v, lerp1, lerp2) >> SHIFT;
71 };
72 return fl::simd::set_u32_4(
73 static_cast<fl::u32>(lane(0)), static_cast<fl::u32>(lane(1)),
74 static_cast<fl::u32>(lane(2)), static_cast<fl::u32>(lane(3)));
75 // ── [end BOUNDARY D+E] ───────────────────────────────────────────────────
76}
77
78
79} // namespace fl
80
static constexpr int FRAC_BITS
Definition s16x16.h:22
platforms::simd_u32x4 simd_u32x4
Definition s16x16x4.h:27
unsigned char u8
Definition s16x16x4.h:132
FL_DISABLE_WARNING_PUSH unsigned char * B
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_OPTIMIZATION_LEVEL_O3_BEGIN
#define FL_OPTIMIZATION_LEVEL_O3_END
Umbrella header for SIMD subsystem.
static constexpr fl::i32 FP_ONE
static constexpr int HP_BITS
static constexpr int FP_BITS
static constexpr fl::i32 HP_ONE
static fl::simd::simd_u32x4 pnoise2d_raw_simd4_vec(fl::simd::simd_u32x4 nx_vec, fl::simd::simd_u32x4 ny_vec, const fl::i32 *fade_lut, const fl::u8 *perm)
static fl::i32 fade(fl::i32 t, const fl::i32 *table)
static fl::i32 lerp(fl::i32 t, fl::i32 a, fl::i32 b)
static fl::i32 grad(int hash, fl::i32 x, fl::i32 y)