FastLED 3.9.15
Loading...
Searching...
No Matches

◆ simd_conv_14641()

static void fl::gfx::blur_detail::simd_conv_14641 ( const u8 * p0,
const u8 * p1,
const u8 * p2,
const u8 * p3,
const u8 * p4,
u8 * out,
int nbytes )
static

Definition at line 633 of file blur.cpp.hpp.

635 {
636 namespace fsimd = fl::simd; // ok bare using
637 const auto w4w = fsimd::set1_u16_16(4), w6w = fsimd::set1_u16_16(6);
638 const auto w4 = fsimd::set1_u16_8(4), w6 = fsimd::set1_u16_8(6);
639 int i = 0;
640 for (; i + 31 < nbytes; i += 32) {
641 auto v0 = fsimd::load_u8_32(p0+i), v1 = fsimd::load_u8_32(p1+i);
642 auto v2 = fsimd::load_u8_32(p2+i), v3 = fsimd::load_u8_32(p3+i), v4 = fsimd::load_u8_32(p4+i);
643 auto s04 = fsimd::add_u16_16(fsimd::widen_lo_u8x32_to_u16(v0), fsimd::widen_lo_u8x32_to_u16(v4));
644 auto s13 = fsimd::add_u16_16(fsimd::widen_lo_u8x32_to_u16(v1), fsimd::widen_lo_u8x32_to_u16(v3));
645 auto lo = fsimd::add_u16_16(s04, fsimd::add_u16_16(fsimd::mullo_u16_16(s13, w4w), fsimd::mullo_u16_16(fsimd::widen_lo_u8x32_to_u16(v2), w6w)));
646 lo = fsimd::srli_u16_16(lo, 4);
647 auto s04h = fsimd::add_u16_16(fsimd::widen_hi_u8x32_to_u16(v0), fsimd::widen_hi_u8x32_to_u16(v4));
648 auto s13h = fsimd::add_u16_16(fsimd::widen_hi_u8x32_to_u16(v1), fsimd::widen_hi_u8x32_to_u16(v3));
649 auto hi = fsimd::add_u16_16(s04h, fsimd::add_u16_16(fsimd::mullo_u16_16(s13h, w4w), fsimd::mullo_u16_16(fsimd::widen_hi_u8x32_to_u16(v2), w6w)));
650 hi = fsimd::srli_u16_16(hi, 4);
651 fsimd::store_u8_32(out+i, fsimd::narrow_u16x16_to_u8(lo, hi));
652 }
653 for (; i + 15 < nbytes; i += 16) {
654 auto v0 = fsimd::load_u8_16(p0+i), v1 = fsimd::load_u8_16(p1+i);
655 auto v2 = fsimd::load_u8_16(p2+i), v3 = fsimd::load_u8_16(p3+i), v4 = fsimd::load_u8_16(p4+i);
656 auto s04 = fsimd::add_u16_8(fsimd::widen_lo_u8_to_u16(v0), fsimd::widen_lo_u8_to_u16(v4));
657 auto s13 = fsimd::add_u16_8(fsimd::widen_lo_u8_to_u16(v1), fsimd::widen_lo_u8_to_u16(v3));
658 auto lo = fsimd::add_u16_8(s04, fsimd::add_u16_8(fsimd::mullo_u16_8(s13, w4), fsimd::mullo_u16_8(fsimd::widen_lo_u8_to_u16(v2), w6)));
659 lo = fsimd::srli_u16_8(lo, 4);
660 auto s04h = fsimd::add_u16_8(fsimd::widen_hi_u8_to_u16(v0), fsimd::widen_hi_u8_to_u16(v4));
661 auto s13h = fsimd::add_u16_8(fsimd::widen_hi_u8_to_u16(v1), fsimd::widen_hi_u8_to_u16(v3));
662 auto hi = fsimd::add_u16_8(s04h, fsimd::add_u16_8(fsimd::mullo_u16_8(s13h, w4), fsimd::mullo_u16_8(fsimd::widen_hi_u8_to_u16(v2), w6)));
663 hi = fsimd::srli_u16_8(hi, 4);
664 fsimd::store_u8_16(out+i, fsimd::narrow_u16_to_u8(lo, hi));
665 }
666 for (; i < nbytes; ++i) {
667 u16 s04 = (u16)p0[i] + (u16)p4[i];
668 u16 s13 = (u16)p1[i] + (u16)p3[i];
669 out[i] = (u8)((s04 + s13 * 4 + (u16)p2[i] * 6) >> 4);
670 }
671}
unsigned char u8
Definition stdint.h:131

Referenced by fl::gfx::blur_detail::simd_hconv_dispatch< 2 >::apply(), and fl::gfx::blur_detail::simd_vconv_dispatch< 2 >::apply().

+ Here is the caller graph for this function: