970{
971 constexpr int shift = 2 * R;
973
974
975
976 RGB_T *bufs[5] = {nullptr, nullptr, nullptr, nullptr, nullptr};
977 for (int i = 0; i <= R; ++i)
978 bufs[i] = scratch + i * w;
979 RGB_T *zero_row = scratch + (R + 1) * w;
980
981
983
984 for (
int y = 0;
y < h; ++
y) {
985 RGB_T *out_row = pixels +
y * w;
986
987
989
990
991 const RGB_T *fwd[4] = {zero_row, zero_row, zero_row, zero_row};
992 for (int k = 0; k < R; ++k)
993 fwd[k] = (
y + 1 + k < h) ? (pixels + (
y + 1 + k) * w) : zero_row;
994
995
996
997
998 {
999 const int prefetch_y =
y + R + 2;
1000 if (prefetch_y < h) {
1001 const char *pf = (const char *)(pixels + prefetch_y * w);
1002 const int row_bytes = w * (int)sizeof(RGB_T);
1003 for (int off = 0; off < row_bytes; off += 64)
1004 __builtin_prefetch(pf + off, 0, 3);
1005 }
1006 }
1007
1008
1009
1010
1011
1012
1013 if (sizeof(typename RGB_T::fp) == 1 && !ApplyAlpha) {
1014
1015 const int nbytes = w * (int)sizeof(RGB_T);
1016 u8 *ob = (
u8 *)out_row;
1017
1019 } else {
1020
1021 for (
int x = 0;
x < w; ++
x) {
1022 acc_t r, g, b;
1023
1025
1026 if (ApplyAlpha) {
1027 out_row[
x] = P::make(
static_cast<acc_t
>(r >> shift),
1028 static_cast<acc_t>(g >> shift),
1029 static_cast<acc_t>(b >> shift), alpha);
1030 } else {
1031 out_row[
x] = P::make(
static_cast<acc_t
>(r >> shift),
1032 static_cast<acc_t>(g >> shift),
1033 static_cast<acc_t>(b >> shift));
1034 }
1035 }
1036 }
1037
1038
1039 RGB_T *recycled = bufs[0];
1040 for (int i = 0; i < R; ++i) bufs[i] = bufs[i + 1];
1041 bufs[R] = recycled;
1042 }
1043}
FASTLED_FORCE_INLINE fl::u8 P(fl::u8 x)
#define FL_BUILTIN_MEMCPY(dest, src, n)
#define FL_BUILTIN_MEMSET(dest, val, n)