FastLED 3.9.15
Loading...
Searching...
No Matches
polyphase.hpp
Go to the documentation of this file.
1/* ***** BEGIN LICENSE BLOCK *****
2 * Version: RCSL 1.0/RPSL 1.0
3 *
4 * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file, are
7 * subject to the current version of the RealNetworks Public Source License
8 * Version 1.0 (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the RealNetworks Community Source License Version 1.0
11 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
12 * in which case the RCSL will apply. You may also obtain the license terms
13 * directly from RealNetworks. You may not use this file except in
14 * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
15 * applicable to this file, the RCSL. Please see the applicable RPSL or
16 * RCSL for the rights, obligations and limitations governing use of the
17 * contents of the file.
18 *
19 * This file is part of the Helix DNA Technology. RealNetworks is the
20 * developer of the Original Code and owns the copyrights in the portions
21 * it created.
22 *
23 * This file, and the files included with this file, is distributed and made
24 * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
25 * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
26 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
27 * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
28 *
29 * Technology Compatibility Kit Test Suite(s) Location:
30 * http://www.helixcommunity.org/content/tck
31 *
32 * Contributor(s):
33 *
34 * ***** END LICENSE BLOCK ***** */
35
36/**************************************************************************************
37 * Fixed-point MP3 decoder
38 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
39 * June 2003
40 *
41 * polyphase.c - final stage of subband transform (polyphase synthesis filter)
42 *
43 * This is the C reference version using __int64
44 * Look in the appropriate subdirectories for optimized asm implementations
45 * (e.g. arm/asmpoly.s)
46 **************************************************************************************/
47
48#include "coder.h"
49#include "fl/stl/stdint.h"
50#include "fl/stl/noexcept.h"
51#include "assembly.h"
52#include "platforms/arm/is_arm.h"
53#include "platforms/avr/is_avr.h"
54
55namespace fl {
56namespace third_party {
57
58// Compile C++ polyphase implementation unless ARM32 assembly is available
59// ARM assembly (asmpoly_gcc.S) compiles only on: __GNUC__ && FL_IS_ARM && !__thumb__ && !__thumb2__ && !FL_IS_AVR
60// This matches: ARM Cortex-A (full ARM32 instruction set)
61// Does NOT match: ARM Cortex-M (Thumb-2 only), AVR, or other non-ARM32 platforms
62#if !(defined(__GNUC__) && defined(FL_IS_ARM) && !defined(__thumb__) && !defined(__thumb2__) && !defined(FL_IS_AVR))
63#define COMPILE_CPP_POLYPHASE
64#endif
65
66/* input to Polyphase = Q(DQ_FRACBITS_OUT-2), gain 2 bits in convolution
67 * we also have the implicit bias of 2^15 to add back, so net fraction bits =
68 * DQ_FRACBITS_OUT - 2 - 2 - 15
69 * (see comment on Dequantize() for more info)
70 */
71#define DEF_NFRACBITS (DQ_FRACBITS_OUT - 2 - 2 - 15)
72#define CSHIFT 12 /* coefficients have 12 leading sign bits for early-terminating mulitplies */
73
74static __inline short ClipToShort(int x, int fracBits) FL_NOEXCEPT
75{
76 int32_t x32 = (int32_t)x;
78
79 /* assumes you've already rounded (x += (1 << (fracBits-1))) */
80 x32 >>= fracBits;
81
82 /* Ken's trick: clips to [-32768, 32767] */
83 sign = x32 >> 31;
84 if (sign != (x32 >> 15))
85 x32 = sign ^ ((1L << 15) - 1);
86
87 return (short)x32;
88}
89
90#define MC0M(x) { \
91 c1 = *coef; coef++; c2 = *coef; coef++; \
92 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
93 sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \
94}
95
96#define MC1M(x) { \
97 c1 = *coef; coef++; \
98 vLo = *(vb1+(x)); \
99 sum1L = MADD64(sum1L, vLo, c1); \
100}
101
102#define MC2M(x) { \
103 c1 = *coef; coef++; c2 = *coef; coef++; \
104 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
105 sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \
106 sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \
107}
108
109#ifdef COMPILE_CPP_POLYPHASE
110
111/**************************************************************************************
112 * Function: PolyphaseMono
113 *
114 * Description: filter one subband and produce 32 output PCM samples for one channel
115 *
116 * Inputs: pointer to PCM output buffer
117 * number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
118 * pointer to start of vbuf (preserved from last call)
119 * start of filter coefficient table (in proper, shuffled order)
120 * no minimum number of guard bits is required for input vbuf
121 * (see additional scaling comments below)
122 *
123 * Outputs: 32 samples of one channel of decoded PCM data, (i.e. Q16.0)
124 *
125 * Return: none
126 *
127 * TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported
128 * (note max filter gain - see polyCoef[] comments)
129 **************************************************************************************/
130void PolyphaseMono(short *pcm, int32_t *vbuf, const int32_t *coefBase) FL_NOEXCEPT
131{
132 int i;
133 const int32_t *coef;
134 int32_t *vb1;
135 int vLo, vHi, c1, c2;
136 Word64 sum1L, sum2L, rndVal;
137
138 rndVal = (Word64)( (i64)1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
139
140 /* special case, output sample 0 */
141 coef = coefBase;
142 vb1 = vbuf;
143 sum1L = rndVal;
144
145 MC0M(0)
146 MC0M(1)
147 MC0M(2)
148 MC0M(3)
149 MC0M(4)
150 MC0M(5)
151 MC0M(6)
152 MC0M(7)
153
154 *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
155
156 /* special case, output sample 16 */
157 coef = coefBase + 256;
158 vb1 = vbuf + 64*16;
159 sum1L = rndVal;
160
161 MC1M(0)
162 MC1M(1)
163 MC1M(2)
164 MC1M(3)
165 MC1M(4)
166 MC1M(5)
167 MC1M(6)
168 MC1M(7)
169
170 *(pcm + 16) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
171
172 /* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */
173 coef = coefBase + 16;
174 vb1 = vbuf + 64;
175 pcm++;
176
177 /* right now, the compiler creates bad asm from this... */
178 for (i = 15; i > 0; i--) {
179 sum1L = sum2L = rndVal;
180
181 MC2M(0)
182 MC2M(1)
183 MC2M(2)
184 MC2M(3)
185 MC2M(4)
186 MC2M(5)
187 MC2M(6)
188 MC2M(7)
189
190 vb1 += 64;
191 *(pcm) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
192 *(pcm + 2*i) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
193 pcm++;
194 }
195}
196
197#define MC0S(x) { \
198 c1 = *coef; coef++; c2 = *coef; coef++; \
199 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
200 sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \
201 vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \
202 sum1R = MADD64(sum1R, vLo, c1); sum1R = MADD64(sum1R, vHi, -c2); \
203}
204
205#define MC1S(x) { \
206 c1 = *coef; coef++; \
207 vLo = *(vb1+(x)); \
208 sum1L = MADD64(sum1L, vLo, c1); \
209 vLo = *(vb1+32+(x)); \
210 sum1R = MADD64(sum1R, vLo, c1); \
211}
212
213#define MC2S(x) { \
214 c1 = *coef; coef++; c2 = *coef; coef++; \
215 vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
216 sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \
217 sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \
218 vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \
219 sum1R = MADD64(sum1R, vLo, c1); sum2R = MADD64(sum2R, vLo, c2); \
220 sum1R = MADD64(sum1R, vHi, -c2); sum2R = MADD64(sum2R, vHi, c1); \
221}
222
223/**************************************************************************************
224 * Function: PolyphaseStereo
225 *
226 * Description: filter one subband and produce 32 output PCM samples for each channel
227 *
228 * Inputs: pointer to PCM output buffer
229 * number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
230 * pointer to start of vbuf (preserved from last call)
231 * start of filter coefficient table (in proper, shuffled order)
232 * no minimum number of guard bits is required for input vbuf
233 * (see additional scaling comments below)
234 *
235 * Outputs: 32 samples of two channels of decoded PCM data, (i.e. Q16.0)
236 *
237 * Return: none
238 *
239 * Notes: interleaves PCM samples LRLRLR...
240 *
241 * TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported
242 **************************************************************************************/
243void PolyphaseStereo(short *pcm, int32_t *vbuf, const int32_t *coefBase) FL_NOEXCEPT
244{
245 int i;
246 const int32_t *coef;
247 int32_t *vb1;
248 int vLo, vHi, c1, c2;
249 Word64 sum1L, sum2L, sum1R, sum2R, rndVal;
250
251 rndVal = (Word64)( (i64)1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
252
253 /* special case, output sample 0 */
254 coef = coefBase;
255 vb1 = vbuf;
256 sum1L = sum1R = rndVal;
257
258 MC0S(0)
259 MC0S(1)
260 MC0S(2)
261 MC0S(3)
262 MC0S(4)
263 MC0S(5)
264 MC0S(6)
265 MC0S(7)
266
267 *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
268 *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
269
270 /* special case, output sample 16 */
271 coef = coefBase + 256;
272 vb1 = vbuf + 64*16;
273 sum1L = sum1R = rndVal;
274
275 MC1S(0)
276 MC1S(1)
277 MC1S(2)
278 MC1S(3)
279 MC1S(4)
280 MC1S(5)
281 MC1S(6)
282 MC1S(7)
283
284 *(pcm + 2*16 + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
285 *(pcm + 2*16 + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
286
287 /* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */
288 coef = coefBase + 16;
289 vb1 = vbuf + 64;
290 pcm += 2;
291
292 /* right now, the compiler creates bad asm from this... */
293 for (i = 15; i > 0; i--) {
294 sum1L = sum2L = rndVal;
295 sum1R = sum2R = rndVal;
296
297 MC2S(0)
298 MC2S(1)
299 MC2S(2)
300 MC2S(3)
301 MC2S(4)
302 MC2S(5)
303 MC2S(6)
304 MC2S(7)
305
306 vb1 += 64;
307 *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
308 *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
309 *(pcm + 2*2*i + 0) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
310 *(pcm + 2*2*i + 1) = ClipToShort((int)SAR64(sum2R, (32-CSHIFT)), DEF_NFRACBITS);
311 pcm += 2;
312 }
313}
314
315#endif // COMPILE_CPP_POLYPHASE
316
317} // namespace third_party
318} // namespace fl
static __inline short ClipToShort(int x, int fracBits) FL_NOEXCEPT
Definition polyphase.hpp:74
fl::i64 i64
Definition coder.h:222
fl::i32 int32_t
Definition coder.h:220
long long Word64
Definition assembly.h:498
void PolyphaseMono(short *pcm, int32_t *vbuf, const int32_t *coefBase)
__inline Word64 SAR64(Word64 x, int n) FL_NOEXCEPT
Shift right.
Definition assembly.h:564
void PolyphaseStereo(short *pcm, int32_t *vbuf, const int32_t *coefBase)
constexpr enable_if< is_fixed_point< T >::value, int >::type sign(T x) FL_NOEXCEPT
Base definition for an LED controller.
Definition crgb.hpp:179
#define MC2S(x)
#define MC1M(x)
Definition polyphase.hpp:96
#define DEF_NFRACBITS
Definition polyphase.hpp:71
#define MC0S(x)
#define MC0M(x)
Definition polyphase.hpp:90
#define CSHIFT
Definition polyphase.hpp:72
#define MC1S(x)
#define MC2M(x)
#define FL_NOEXCEPT