◆ Gamma8Impl() [1/2]

fl::Gamma8Impl::Gamma8Impl ( float gamma )
inlineexplicit
Definition at line 355 of file ease.cpp.hpp.
                                     {
        // i=0 is mathematically exact regardless of gamma: pow(0, any) = 0.
        // The s8x24::pow short-circuit covers exact 0 input, but we set it
        // here directly anyway because (a) the loop below avoids the
        // round-trip math and (b) `mLut` is otherwise uninitialized.
        // i=255 used to need a workaround for the log2(1+t) endpoint
        // residual; that snap is now handled inside s8x24::pow itself
        // (see #2969).
        mLut[0] = 0;
        // Compute the 256-entry u16 gamma LUT in fixed-point so we don't
        // pull `__ieee754_pow` (libm, ~2.7 KB) into release builds — the
        // double-precision pow chain dominates the top-9 bytes attributed
        // to libm in the post-#2908 ESP32-S3 NEOPIXEL Blink audit
        // (see #2886 / #2910).
        //
        // `s8x24` is 8-integer + 24-fractional bits (same 32-bit storage as
        // s16x16, but 256× the sub-LSB resolution). Both log2_fp/exp2_fp use
        // the same 4-term minimax polynomial, but s8x24 carries the full
        // 24-bit intermediate precision end-to-end instead of truncating
        // back to 16 frac bits at each stage — bringing the runtime output
        // within ~1 LSB of true float pow() at the u16 output. Combined
        // with the special-case for gamma=2.8 in Gamma8::getOrCreate(),
        // this closes the divergence with the precomputed GAMMA_2_8_LUT
        // (see #2963 audit + ucs7604 "default gamma 2.8" subcase).
        //
        // Bit budget check: max intermediate is exp*log2_fp(1/255) =
        // 16 * -7.994 ≈ -127.9, fits in s8x24's signed [-128, 128) range.
        // (GammaKey caps user gamma at 16.)
        //
        // libm-free: log2_fp/exp2_fp are pure integer-polynomial impls.
        // The only float kept is the one-shot s8x24(gamma) constructor
        // call (pulls __mulsf3 / __fixsfsi helpers, both << 100 B).
        const fl::s8x24 gamma_fp(gamma);
        constexpr fl::s8x24 inv_255_fp(1.0f / 255.0f);
        for (int i = 1; i < 256; ++i) {
            const fl::s8x24 x = static_cast<i32>(i) * inv_255_fp;  // (0, 1]
            const fl::s8x24 r = fl::s8x24::pow(x, gamma_fp);       // (0, 1]
            // r.raw() is the s8x24 raw with FRAC_BITS=24, range [0, 2^24].
            // Scale to u16 [0, 65535] with round-half-up. 24+16 bit
            // multiplication needs a u64 intermediate:
            //   result = ((u64)raw * 65535 + (1<<23)) >> 24
            const fl::u64 scaled =
                (static_cast<fl::u64>(static_cast<fl::u32>(r.raw())) * 65535ull
                 + (1ull << 23)) >> 24;
            mLut[i] = static_cast<u16>(scaled > 65535u ? 65535u : scaled);
        }
    }
References fl::gamma(), fl::s8x24::pow(), fl::s8x24::raw(), and fl::x.
Here is the call graph for this function: