FastLED 3.9.15
Loading...
Searching...
No Matches
imdct.hpp
Go to the documentation of this file.
1/* ***** BEGIN LICENSE BLOCK *****
2 * Version: RCSL 1.0/RPSL 1.0
3 *
4 * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
5 *
6 * The contents of this file, and the files included with this file, are
7 * subject to the current version of the RealNetworks Public Source License
8 * Version 1.0 (the "RPSL") available at
9 * http://www.helixcommunity.org/content/rpsl unless you have licensed
10 * the file under the RealNetworks Community Source License Version 1.0
11 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
12 * in which case the RCSL will apply. You may also obtain the license terms
13 * directly from RealNetworks. You may not use this file except in
14 * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
15 * applicable to this file, the RCSL. Please see the applicable RPSL or
16 * RCSL for the rights, obligations and limitations governing use of the
17 * contents of the file.
18 *
19 * This file is part of the Helix DNA Technology. RealNetworks is the
20 * developer of the Original Code and owns the copyrights in the portions
21 * it created.
22 *
23 * This file, and the files included with this file, is distributed and made
24 * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
25 * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
26 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
27 * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
28 *
29 * Technology Compatibility Kit Test Suite(s) Location:
30 * http://www.helixcommunity.org/content/tck
31 *
32 * Contributor(s):
33 *
34 * ***** END LICENSE BLOCK ***** */
35
36/**************************************************************************************
37 * Fixed-point MP3 decoder
38 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
39 * June 2003
40 *
41 * imdct.c - antialias, inverse transform (short/long/mixed), windowing,
42 * overlap-add, frequency inversion
43 **************************************************************************************/
44
45#include "coder.h"
46#include "assembly.h"
47#include "fl/stl/stdint.h"
48#include "fl/stl/noexcept.h"
49
50namespace fl {
51namespace third_party {
52
53
54
55/**************************************************************************************
56 * Function: AntiAlias
57 *
58 * Description: smooth transition across DCT block boundaries (every 18 coefficients)
59 *
60 * Inputs: vector of dequantized coefficients, length = (nBfly+1) * 18
61 * number of "butterflies" to perform (one butterfly means one
62 * inter-block smoothing operation)
63 *
64 * Outputs: updated coefficient vector x
65 *
66 * Return: none
67 *
68 * Notes: weighted average of opposite bands (pairwise) from the 8 samples
69 * before and after each block boundary
70 * nBlocks = (nonZeroBound + 7) / 18, since nZB is the first ZERO sample
71 * above which all other samples are also zero
72 * max gain per sample = 1.372
73 * MAX(i) (abs(csa[i][0]) + abs(csa[i][1]))
74 * bits gained = 0
75 * assume at least 1 guard bit in x[] to avoid overflow
76 * (should be guaranteed from dequant, and max gain from stproc * max
77 * gain from AntiAlias < 2.0)
78 **************************************************************************************/
79// a little bit faster in RAM (< 1 ms per block)
81{
82 int32_t k;
83 int32_t a0, b0, c0, c1;
84 const int32_t *c;
85
86 /* csa = Q31 */
87 for (k = nBfly; k > 0; k--) {
88 c = csa[0];
89 x += 18;
90
91 a0 = x[-1]; c0 = *c; c++; b0 = x[0]; c1 = *c; c++;
92 x[-1] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
93 x[0] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
94
95 a0 = x[-2]; c0 = *c; c++; b0 = x[1]; c1 = *c; c++;
96 x[-2] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
97 x[1] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
98
99 a0 = x[-3]; c0 = *c; c++; b0 = x[2]; c1 = *c; c++;
100 x[-3] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
101 x[2] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
102
103 a0 = x[-4]; c0 = *c; c++; b0 = x[3]; c1 = *c; c++;
104 x[-4] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
105 x[3] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
106
107 a0 = x[-5]; c0 = *c; c++; b0 = x[4]; c1 = *c; c++;
108 x[-5] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
109 x[4] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
110
111 a0 = x[-6]; c0 = *c; c++; b0 = x[5]; c1 = *c; c++;
112 x[-6] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
113 x[5] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
114
115 a0 = x[-7]; c0 = *c; c++; b0 = x[6]; c1 = *c; c++;
116 x[-7] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
117 x[6] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
118
119 a0 = x[-8]; c0 = *c; c++; b0 = x[7]; c1 = *c; c++;
120 x[-8] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;
121 x[7] = (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;
122 }
123}
124
125/**************************************************************************************
126 * Function: WinPrevious
127 *
128 * Description: apply specified window to second half of previous IMDCT (overlap part)
129 *
130 * Inputs: vector of 9 coefficients (xPrev)
131 *
132 * Outputs: 18 windowed output coefficients (gain 1 integer bit)
133 * window type (0, 1, 2, 3)
134 *
135 * Return: none
136 *
137 * Notes: produces 9 output samples from 18 input samples via symmetry
138 * all blocks gain at least 1 guard bit via window (long blocks get extra
139 * sign bit, short blocks can have one addition but max gain < 1.0)
140 **************************************************************************************/
141static void WinPrevious(int32_t *xPrev, int32_t *xPrevWin, int32_t btPrev) FL_NOEXCEPT
142{
143 int32_t i;
144 int32_t x, *xp, *xpwLo, *xpwHi, wLo, wHi;
145 const int32_t *wpLo, *wpHi;
146
147 xp = xPrev;
148 /* mapping (see IMDCT12x3): xPrev[0-2] = sum[6-8], xPrev[3-8] = sum[12-17] */
149 if (btPrev == 2) {
150 /* this could be reordered for minimum loads/stores */
151 wpLo = imdctWin[btPrev];
152 xPrevWin[ 0] = MULSHIFT32(wpLo[ 6], xPrev[2]) + MULSHIFT32(wpLo[0], xPrev[6]);
153 xPrevWin[ 1] = MULSHIFT32(wpLo[ 7], xPrev[1]) + MULSHIFT32(wpLo[1], xPrev[7]);
154 xPrevWin[ 2] = MULSHIFT32(wpLo[ 8], xPrev[0]) + MULSHIFT32(wpLo[2], xPrev[8]);
155 xPrevWin[ 3] = MULSHIFT32(wpLo[ 9], xPrev[0]) + MULSHIFT32(wpLo[3], xPrev[8]);
156 xPrevWin[ 4] = MULSHIFT32(wpLo[10], xPrev[1]) + MULSHIFT32(wpLo[4], xPrev[7]);
157 xPrevWin[ 5] = MULSHIFT32(wpLo[11], xPrev[2]) + MULSHIFT32(wpLo[5], xPrev[6]);
158 xPrevWin[ 6] = MULSHIFT32(wpLo[ 6], xPrev[5]);
159 xPrevWin[ 7] = MULSHIFT32(wpLo[ 7], xPrev[4]);
160 xPrevWin[ 8] = MULSHIFT32(wpLo[ 8], xPrev[3]);
161 xPrevWin[ 9] = MULSHIFT32(wpLo[ 9], xPrev[3]);
162 xPrevWin[10] = MULSHIFT32(wpLo[10], xPrev[4]);
163 xPrevWin[11] = MULSHIFT32(wpLo[11], xPrev[5]);
164 xPrevWin[12] = xPrevWin[13] = xPrevWin[14] = xPrevWin[15] = xPrevWin[16] = xPrevWin[17] = 0;
165 } else {
166 /* use ARM-style pointers (*ptr++) so that ADS compiles well */
167 wpLo = imdctWin[btPrev] + 18;
168 wpHi = wpLo + 17;
169 xpwLo = xPrevWin;
170 xpwHi = xPrevWin + 17;
171 for (i = 9; i > 0; i--) {
172 x = *xp++; wLo = *wpLo++; wHi = *wpHi--;
173 *xpwLo++ = MULSHIFT32(wLo, x);
174 *xpwHi-- = MULSHIFT32(wHi, x);
175 }
176 }
177}
178
179/**************************************************************************************
180 * Function: FreqInvertRescale
181 *
182 * Description: do frequency inversion (odd samples of odd blocks) and rescale
183 * if necessary (extra guard bits added before IMDCT)
184 *
185 * Inputs: output vector y (18 new samples, spaced NBANDS apart)
186 * previous sample vector xPrev (9 samples)
187 * index of current block
188 * number of extra shifts added before IMDCT (usually 0)
189 *
190 * Outputs: inverted and rescaled (as necessary) outputs
191 * rescaled (as necessary) previous samples
192 *
193 * Return: updated mOut (from new outputs y)
194 **************************************************************************************/
196{
197 int32_t i;
198 int32_t d, mOut;
199 int32_t y0, y1, y2, y3, y4, y5, y6, y7, y8;
200
201 if (es == 0) {
202 /* fast case - frequency invert only (no rescaling) - can fuse into overlap-add for speed, if desired */
203 if (blockIdx & 0x01) {
204 y += NBANDS;
205 y0 = *y; y += 2*NBANDS;
206 y1 = *y; y += 2*NBANDS;
207 y2 = *y; y += 2*NBANDS;
208 y3 = *y; y += 2*NBANDS;
209 y4 = *y; y += 2*NBANDS;
210 y5 = *y; y += 2*NBANDS;
211 y6 = *y; y += 2*NBANDS;
212 y7 = *y; y += 2*NBANDS;
213 y8 = *y; y += 2*NBANDS;
214
215 y -= 18*NBANDS;
216 *y = -y0; y += 2*NBANDS;
217 *y = -y1; y += 2*NBANDS;
218 *y = -y2; y += 2*NBANDS;
219 *y = -y3; y += 2*NBANDS;
220 *y = -y4; y += 2*NBANDS;
221 *y = -y5; y += 2*NBANDS;
222 *y = -y6; y += 2*NBANDS;
223 *y = -y7; y += 2*NBANDS;
224 *y = -y8; y += 2*NBANDS;
225 }
226 return 0;
227 } else {
228 /* undo pre-IMDCT scaling, clipping if necessary */
229 mOut = 0;
230 if (blockIdx & 0x01) {
231 /* frequency invert */
232 for (i = 0; i < 18; i+=2) {
233 d = *y; CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y); y += NBANDS;
234 d = -*y; CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y); y += NBANDS;
235 d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = static_cast<int>(static_cast<unsigned int>(d) << es);
236 }
237 } else {
238 for (i = 0; i < 18; i+=2) {
239 d = *y; CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y); y += NBANDS;
240 d = *y; CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y); y += NBANDS;
241 d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = static_cast<int>(static_cast<unsigned int>(d) << es);
242 }
243 }
244 return mOut;
245 }
246}
247
248/* format = Q31
249 * #define M_PI 3.14159265358979323846
250 * double u = 2.0 * M_PI / 9.0;
251 * float c0 = sqrt(3.0) / 2.0;
252 * float c1 = cos(u);
253 * float c2 = cos(2*u);
254 * float c3 = sin(u);
255 * float c4 = sin(2*u);
256 */
257
258static const int32_t c9_0 = static_cast<int32_t>(0x6ed9eba1U);
259static const int32_t c9_1 = static_cast<int32_t>(0x620dbe8bU);
260static const int32_t c9_2 = 0x163a1a7e;
261static const int32_t c9_3 = static_cast<int32_t>(0x5246dd49U);
262static const int32_t c9_4 = static_cast<int32_t>(0x7e0e2e32U);
263
264/* format = Q31
265 * cos(((0:8) + 0.5) * (pi/18))
266 */
267static const int32_t c18[9] = {
268 static_cast<int32_t>(0x7f834ed0U), static_cast<int32_t>(0x7ba3751dU), static_cast<int32_t>(0x7401e4c1U), static_cast<int32_t>(0x68d9f964U), static_cast<int32_t>(0x5a82799aU), static_cast<int32_t>(0x496af3e2U), static_cast<int32_t>(0x36185aeeU), 0x2120fb83, 0x0b27eb5c,
269};
270
271/* require at least 3 guard bits in x[] to ensure no overflow */
272static __inline void idct9(int32_t *x) FL_NOEXCEPT
273{
274 int32_t a1, a2, a3, a4, a5, a6, a7, a8, a9;
275 int32_t a10, a11, a12, a13, a14, a15, a16, a17, a18;
276 int32_t a19, a20, a21, a22, a23, a24, a25, a26, a27;
277 int32_t m1, m3, m5, m6, m7, m8, m9, m10, m11, m12;
278 int32_t x0, x1, x2, x3, x4, x5, x6, x7, x8;
279
280 x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x4 = x[4];
281 x5 = x[5]; x6 = x[6]; x7 = x[7]; x8 = x[8];
282
283 a1 = x0 - x6;
284 a2 = x1 - x5;
285 a3 = x1 + x5;
286 a4 = x2 - x4;
287 a5 = x2 + x4;
288 a6 = x2 + x8;
289 a7 = x1 + x7;
290
291 a8 = a6 - a5; /* ie x[8] - x[4] */
292 a9 = a3 - a7; /* ie x[5] - x[7] */
293 a10 = a2 - x7; /* ie x[1] - x[5] - x[7] */
294 a11 = a4 - x8; /* ie x[2] - x[4] - x[8] */
295
296 /* do the << 1 as constant shifts where mX is actually used (free, no stall or extra inst.) */
297 m1 = MULSHIFT32(c9_0, x3);
298 m3 = MULSHIFT32(c9_0, a10);
299 m5 = MULSHIFT32(c9_1, a5);
300 m6 = MULSHIFT32(c9_2, a6);
301 m7 = MULSHIFT32(c9_1, a8);
302 m8 = MULSHIFT32(c9_2, a5);
303 m9 = MULSHIFT32(c9_3, a9);
304 m10 = MULSHIFT32(c9_4, a7);
305 m11 = MULSHIFT32(c9_3, a3);
306 m12 = MULSHIFT32(c9_4, a9);
307
308 a12 = x[0] + (x[6] >> 1);
309 a13 = a12 + ( m1 * 2L);
310 a14 = a12 - ( m1 * 2L);
311 a15 = a1 + ( a11 >> 1);
312 a16 = ( m5 * 2L) + (m6 * 2L);
313 a17 = ( m7 * 2L) - (m8 * 2L);
314 a18 = a16 + a17;
315 a19 = ( m9 * 2L) + (m10 * 2L);
316 a20 = (m11 * 2L) - (m12 * 2L);
317
318 a21 = a20 - a19;
319 a22 = a13 + a16;
320 a23 = a14 + a16;
321 a24 = a14 + a17;
322 a25 = a13 + a17;
323 a26 = a14 - a18;
324 a27 = a13 - a18;
325
326 x0 = a22 + a19; x[0] = x0;
327 x1 = a15 + (m3 * 2L); x[1] = x1;
328 x2 = a24 + a20; x[2] = x2;
329 x3 = a26 - a21; x[3] = x3;
330 x4 = a1 - a11; x[4] = x4;
331 x5 = a27 + a21; x[5] = x5;
332 x6 = a25 - a20; x[6] = x6;
333 x7 = a15 - (m3 * 2L); x[7] = x7;
334 x8 = a23 - a19; x[8] = x8;
335}
336
337/* let c(j) = cos(M_PI/36 * ((j)+0.5)), s(j) = sin(M_PI/36 * ((j)+0.5))
338 * then fastWin[2*j+0] = c(j)*(s(j) + c(j)), j = [0, 8]
339 * fastWin[2*j+1] = c(j)*(s(j) - c(j))
340 * format = Q30
341 */
343 static_cast<int32_t>(0x42aace8bU), static_cast<int32_t>(0xc2e92724U), static_cast<int32_t>(0x47311c28U), static_cast<int32_t>(0xc95f619aU), static_cast<int32_t>(0x4a868febU), static_cast<int32_t>(0xd0859d8cU),
344 static_cast<int32_t>(0x4c913b51U), static_cast<int32_t>(0xd8243ea0U), static_cast<int32_t>(0x4d413cccU), static_cast<int32_t>(0xe0000000U), static_cast<int32_t>(0x4c913b51U), static_cast<int32_t>(0xe7dbc161U),
345 static_cast<int32_t>(0x4a868febU), static_cast<int32_t>(0xef7a6275U), static_cast<int32_t>(0x47311c28U), static_cast<int32_t>(0xf6a09e67U), static_cast<int32_t>(0x42aace8bU), static_cast<int32_t>(0xfd16d8ddU),
346};
347
348/**************************************************************************************
349 * Function: IMDCT36
350 *
351 * Description: 36-point modified DCT, with windowing and overlap-add (50% overlap)
352 *
353 * Inputs: vector of 18 coefficients (N/2 inputs produces N outputs, by symmetry)
354 * overlap part of last IMDCT (9 samples - see output comments)
355 * window type (0,1,2,3) of current and previous block
356 * current block index (for deciding whether to do frequency inversion)
357 * number of guard bits in input vector
358 *
359 * Outputs: 18 output samples, after windowing and overlap-add with last frame
360 * second half of (unwindowed) 36-point IMDCT - save for next time
361 * only save 9 xPrev samples, using symmetry (see WinPrevious())
362 *
363 * Notes: this is Ken's hyper-fast algorithm, including symmetric sin window
364 * optimization, if applicable
365 * total number of multiplies, general case:
366 * 2*10 (idct9) + 9 (last stage imdct) + 36 (for windowing) = 65
367 * total number of multiplies, btCurr == 0 && btPrev == 0:
368 * 2*10 (idct9) + 9 (last stage imdct) + 18 (for windowing) = 47
369 *
370 * blockType == 0 is by far the most common case, so it should be
371 * possible to use the fast path most of the time
372 * this is the fastest known algorithm for performing
373 * long IMDCT + windowing + overlap-add in MP3
374 *
375 * Return: mOut (OR of abs(y) for all y calculated here)
376 *
377 * TODO: optimize for ARM (reorder window coefs, ARM-style pointers in C,
378 * inline asm may or may not be helpful)
379 **************************************************************************************/
380// barely faster in RAM
381static int32_t IMDCT36(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btCurr, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT
382{
383 int32_t i, es;
384 int32_t xBuf[18], xPrevWin[18];
385 int32_t acc1, acc2, s, d, t, mOut;
386 int32_t xo, xe, c, *xp, yLo, yHi;
387 const int32_t *cp, *wp;
388
389 acc1 = acc2 = 0;
390 xCurr += 17;
391
392 /* 7 gb is always adequate for antialias + accumulator loop + idct9 */
393 if (gb < 7) {
394 /* rarely triggered - 5% to 10% of the time on normal clips (with Q25 input) */
395 es = 7 - gb;
396 for (i = 8; i >= 0; i--) {
397 acc1 = ((*xCurr--) >> es) - acc1;
398 acc2 = acc1 - acc2;
399 acc1 = ((*xCurr--) >> es) - acc1;
400 xBuf[i+9] = acc2; /* odd */
401 xBuf[i+0] = acc1; /* even */
402 xPrev[i] >>= es;
403 }
404 } else {
405 es = 0;
406 /* max gain = 18, assume adequate guard bits */
407 for (i = 8; i >= 0; i--) {
408 acc1 = (*xCurr--) - acc1;
409 acc2 = acc1 - acc2;
410 acc1 = (*xCurr--) - acc1;
411 xBuf[i+9] = acc2; /* odd */
412 xBuf[i+0] = acc1; /* even */
413 }
414 }
415 /* xEven[0] and xOdd[0] scaled by 0.5 */
416 xBuf[9] >>= 1;
417 xBuf[0] >>= 1;
418
419 /* do 9-point IDCT on even and odd */
420 idct9(xBuf+0); /* even */
421 idct9(xBuf+9); /* odd */
422
423 xp = xBuf + 8;
424 cp = c18 + 8;
425 mOut = 0;
426 if (btPrev == 0 && btCurr == 0) {
427 /* fast path - use symmetry of sin window to reduce windowing multiplies to 18 (N/2) */
428 wp = fastWin36;
429 for (i = 0; i < 9; i++) {
430 /* do ARM-style pointer arithmetic (i still needed for y[] indexing - compiler spills if 2 y pointers) */
431 c = *cp--; xo = *(xp + 9); xe = *xp--;
432 /* gain 2 int bits here */
433 xo = MULSHIFT32(c, xo); /* 2*c18*xOdd (mul by 2 implicit in scaling) */
434 xe >>= 2;
435
436 s = -(*xPrev); /* sum from last block (always at least 2 guard bits) */
437 d = -(xe - xo); /* gain 2 int bits, don't shift xo (effective << 1 to eat sign bit, << 1 for mul by 2) */
438 (*xPrev++) = xe + xo; /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */
439 t = s - d;
440
441 yLo = (d + (MULSHIFT32(t, *wp++) * 4L));
442 yHi = (s + (MULSHIFT32(t, *wp++) * 4L));
443 y[(i)*NBANDS] = yLo;
444 y[(17-i)*NBANDS] = yHi;
445 mOut |= FASTABS(yLo);
446 mOut |= FASTABS(yHi);
447 }
448 } else {
449 /* slower method - either prev or curr is using window type != 0 so do full 36-point window
450 * output xPrevWin has at least 3 guard bits (xPrev has 2, gain 1 in WinPrevious)
451 */
452 WinPrevious(xPrev, xPrevWin, btPrev);
453
454 wp = imdctWin[btCurr];
455 for (i = 0; i < 9; i++) {
456 c = *cp--; xo = *(xp + 9); xe = *xp--;
457 /* gain 2 int bits here */
458 xo = MULSHIFT32(c, xo); /* 2*c18*xOdd (mul by 2 implicit in scaling) */
459 xe >>= 2;
460
461 d = xe - xo;
462 (*xPrev++) = xe + xo; /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */
463
464 yLo = (xPrevWin[i] + MULSHIFT32(d, wp[i])) * 4L;
465 yHi = (xPrevWin[17-i] + MULSHIFT32(d, wp[17-i])) * 4L;
466 y[(i)*NBANDS] = yLo;
467 y[(17-i)*NBANDS] = yHi;
468 mOut |= FASTABS(yLo);
469 mOut |= FASTABS(yHi);
470 }
471 }
472
473 xPrev -= 9;
474 mOut |= FreqInvertRescale(y, xPrev, blockIdx, es);
475
476 return mOut;
477}
478
479static int32_t c3_0 = static_cast<int32_t>(0x6ed9eba1U); /* format = Q31, cos(pi/6) */
480static int32_t c6[3] = { static_cast<int32_t>(0x7ba3751dU), static_cast<int32_t>(0x5a82799aU), 0x2120fb83 }; /* format = Q31, cos(((0:2) + 0.5) * (pi/6)) */
481
482/* 12-point inverse DCT, used in IMDCT12x3()
483 * 4 input guard bits will ensure no overflow
484 */
485static __inline void imdct12 (int32_t *x, int32_t *out) FL_NOEXCEPT
486{
487 int32_t a0, a1, a2;
488 int32_t x0, x1, x2, x3, x4, x5;
489
490 x0 = *x; x+=3; x1 = *x; x+=3;
491 x2 = *x; x+=3; x3 = *x; x+=3;
492 x4 = *x; x+=3; x5 = *x; x+=3;
493
494 x4 -= x5;
495 x3 -= x4;
496 x2 -= x3;
497 x3 -= x5;
498 x1 -= x2;
499 x0 -= x1;
500 x1 -= x3;
501
502 x0 >>= 1;
503 x1 >>= 1;
504
505 a0 = MULSHIFT32(c3_0, x2) * 2L;
506 a1 = x0 + (x4 >> 1);
507 a2 = x0 - x4;
508 x0 = a1 + a0;
509 x2 = a2;
510 x4 = a1 - a0;
511
512 a0 = MULSHIFT32(c3_0, x3) * 2L;
513 a1 = x1 + (x5 >> 1);
514 a2 = x1 - x5;
515
516 /* cos window odd samples, mul by 2, eat sign bit */
517 x1 = MULSHIFT32(c6[0], a1 + a0) * 4L;
518 x3 = MULSHIFT32(c6[1], a2) * 4L;
519 x5 = MULSHIFT32(c6[2], a1 - a0) * 4L;
520
521 *out = x0 + x1; out++;
522 *out = x2 + x3; out++;
523 *out = x4 + x5; out++;
524 *out = x4 - x5; out++;
525 *out = x2 - x3; out++;
526 *out = x0 - x1;
527}
528
529/**************************************************************************************
530 * Function: IMDCT12x3
531 *
532 * Description: three 12-point modified DCT's for short blocks, with windowing,
533 * short block concatenation, and overlap-add
534 *
535 * Inputs: 3 interleaved vectors of 6 samples each
536 * (block0[0], block1[0], block2[0], block0[1], block1[1]....)
537 * overlap part of last IMDCT (9 samples - see output comments)
538 * window type (0,1,2,3) of previous block
539 * current block index (for deciding whether to do frequency inversion)
540 * number of guard bits in input vector
541 *
542 * Outputs: updated sample vector x, net gain of 1 integer bit
543 * second half of (unwindowed) IMDCT's - save for next time
544 * only save 9 xPrev samples, using symmetry (see WinPrevious())
545 *
546 * Return: mOut (OR of abs(y) for all y calculated here)
547 *
548 * TODO: optimize for ARM
549 **************************************************************************************/
550 // barely faster in RAM
551static int32_t IMDCT12x3(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT
552{
553 int32_t i, es, mOut;
554 int32_t yLo, xBuf[18], xPrevWin[18]; /* need temp buffer for reordering short blocks */
555 const int32_t *wp;
556
557 es = 0;
558 /* 7 gb is always adequate for accumulator loop + idct12 + window + overlap */
559 if (gb < 7) {
560 es = 7 - gb;
561 for (i = 0; i < 18; i+=2) {
562 xCurr[i+0] >>= es;
563 xCurr[i+1] >>= es;
564 *xPrev++ >>= es;
565 }
566 xPrev -= 9;
567 }
568
569 /* requires 4 input guard bits for each imdct12 */
570 imdct12(xCurr + 0, xBuf + 0);
571 imdct12(xCurr + 1, xBuf + 6);
572 imdct12(xCurr + 2, xBuf + 12);
573
574 /* window previous from last time */
575 WinPrevious(xPrev, xPrevWin, btPrev);
576
577 /* could unroll this for speed, minimum loads (short blocks usually rare, so doesn't make much overall difference)
578 * xPrevWin[i] << 2 still has 1 gb always, max gain of windowed xBuf stuff also < 1.0 and gain the sign bit
579 * so y calculations won't overflow
580 */
581 wp = imdctWin[2];
582 mOut = 0;
583 for (i = 0; i < 3; i++) {
584 yLo = (xPrevWin[ 0+i] * 4L);
585 mOut |= FASTABS(yLo); y[( 0+i)*NBANDS] = yLo;
586 yLo = (xPrevWin[ 3+i] * 4L);
587 mOut |= FASTABS(yLo); y[( 3+i)*NBANDS] = yLo;
588 yLo = (xPrevWin[ 6+i] * 4L) + (MULSHIFT32(wp[0+i], xBuf[3+i]));
589 mOut |= FASTABS(yLo); y[( 6+i)*NBANDS] = yLo;
590 yLo = (xPrevWin[ 9+i] * 4L) + (MULSHIFT32(wp[3+i], xBuf[5-i]));
591 mOut |= FASTABS(yLo); y[( 9+i)*NBANDS] = yLo;
592 yLo = (xPrevWin[12+i] * 4L) + (MULSHIFT32(wp[6+i], xBuf[2-i]) + MULSHIFT32(wp[0+i], xBuf[(6+3)+i]));
593 mOut |= FASTABS(yLo); y[(12+i)*NBANDS] = yLo;
594 yLo = (xPrevWin[15+i] * 4L) + (MULSHIFT32(wp[9+i], xBuf[0+i]) + MULSHIFT32(wp[3+i], xBuf[(6+5)-i]));
595 mOut |= FASTABS(yLo); y[(15+i)*NBANDS] = yLo;
596 }
597
598 /* save previous (unwindowed) for overlap - only need samples 6-8, 12-17 */
599 for (i = 6; i < 9; i++)
600 *xPrev++ = xBuf[i] >> 2;
601 for (i = 12; i < 18; i++)
602 *xPrev++ = xBuf[i] >> 2;
603
604 xPrev -= 9;
605 mOut |= FreqInvertRescale(y, xPrev, blockIdx, es);
606
607 return mOut;
608}
609
610/**************************************************************************************
611 * Function: HybridTransform
612 *
613 * Description: IMDCT's, windowing, and overlap-add on long/short/mixed blocks
614 *
615 * Inputs: vector of input coefficients, length = nBlocksTotal * 18)
616 * vector of overlap samples from last time, length = nBlocksPrev * 9)
617 * buffer for output samples, length = MAXNSAMP
618 * SideInfoSub struct for this granule/channel
619 * BlockCount struct with necessary info
620 * number of non-zero input and overlap blocks
621 * number of long blocks in input vector (rest assumed to be short blocks)
622 * number of blocks which use long window (type) 0 in case of mixed block
623 * (bc->currWinSwitch, 0 for non-mixed blocks)
624 *
625 * Outputs: transformed, windowed, and overlapped sample buffer
626 * does frequency inversion on odd blocks
627 * updated buffer of samples for overlap
628 *
629 * Return: number of non-zero IMDCT blocks calculated in this call
630 * (including overlap-add)
631 *
632 * TODO: examine mixedBlock/winSwitch logic carefully (test he_mode.bit)
633 **************************************************************************************/
635{
636 int32_t xPrevWin[18];
637 int32_t currWinIdx, prevWinIdx;
638 int32_t i, j, nBlocksOut, nonZero, mOut;
639 int32_t xp;
640
641 ASSERT(bc->nBlocksLong <= NBANDS);
642 ASSERT(bc->nBlocksTotal <= NBANDS);
643 ASSERT(bc->nBlocksPrev <= NBANDS);
644
645 mOut = 0;
646
647 /* do long blocks, if any */
648 for(i = 0; i < bc->nBlocksLong; i++) {
649 /* currWinIdx picks the right window for long blocks (if mixed, long blocks use window type 0) */
650 currWinIdx = sis->blockType;
651 if (sis->mixedBlock && i < bc->currWinSwitch)
652 currWinIdx = 0;
653
654 prevWinIdx = bc->prevType;
655 if (i < bc->prevWinSwitch)
656 prevWinIdx = 0;
657
658 /* do 36-point IMDCT, including windowing and overlap-add */
659 mOut |= IMDCT36(xCurr, xPrev, &(y[0][i]), currWinIdx, prevWinIdx, i, bc->gbIn);
660 xCurr += 18;
661 xPrev += 9;
662 }
663
664 /* do short blocks (if any) */
665 for ( ; i < bc->nBlocksTotal; i++) {
666 ASSERT(sis->blockType == 2);
667
668 prevWinIdx = bc->prevType;
669 if (i < bc->prevWinSwitch)
670 prevWinIdx = 0;
671
672 mOut |= IMDCT12x3(xCurr, xPrev, &(y[0][i]), prevWinIdx, i, bc->gbIn);
673 xCurr += 18;
674 xPrev += 9;
675 }
676 nBlocksOut = i;
677
678 /* window and overlap prev if prev longer that current */
679 for ( ; i < bc->nBlocksPrev; i++) {
680 prevWinIdx = bc->prevType;
681 if (i < bc->prevWinSwitch)
682 prevWinIdx = 0;
683 WinPrevious(xPrev, xPrevWin, prevWinIdx);
684
685 nonZero = 0;
686 /* sign_bit = -1 for odd i, 0 for even i */
687 int32_t sign_bit = ((i & 1) ? (int32_t)(-1) : 0);
688 for (j = 0; j < 9; j++) {
689 xp = xPrevWin[2*j+0] * 4L; /* * 4 temp for scaling */
690 nonZero |= xp;
691 y[2*j+0][i] = xp;
692 mOut |= FASTABS(xp);
693
694 /* frequency inversion on odd blocks/odd samples (flip sign if i odd, j odd) */
695 xp = xPrevWin[2*j+1] * 4L;
696 xp = (xp ^ sign_bit) + (i & 0x01);
697 nonZero |= xp;
698 y[2*j+1][i] = xp;
699 mOut |= FASTABS(xp);
700
701 xPrev[j] = 0;
702 }
703 xPrev += 9;
704 if (nonZero)
705 nBlocksOut = i;
706 }
707
708 /* clear rest of blocks */
709 for ( ; i < 32; i++) {
710 for (j = 0; j < 18; j++)
711 y[j][i] = 0;
712 }
713
714 bc->gbOut = CLZ(mOut) - 1;
715
716 return nBlocksOut;
717}
718
719/**************************************************************************************
720 * Function: IMDCT
721 *
722 * Description: do alias reduction, inverse MDCT, overlap-add, and frequency inversion
723 *
724 * Inputs: MP3DecInfo structure filled by UnpackFrameHeader(), UnpackSideInfo(),
725 * UnpackScaleFactors(), and DecodeHuffman() (for this granule, channel)
726 * includes PCM samples in overBuf (from last call to IMDCT) for OLA
727 * index of current granule and channel
728 *
729 * Outputs: PCM samples in outBuf, for input to subband transform
730 * PCM samples in overBuf, for OLA next time
731 * updated hi->nonZeroBound index for this channel
732 *
733 * Return: 0 on success, -1 if null input pointers
734 **************************************************************************************/
735 // a bit faster in RAM
737{
738 int32_t nBfly, blockCutoff;
739 FrameHeader *fh;
740 SideInfo *si;
741 HuffmanInfo *hi;
742 IMDCTInfo *mi;
743 BlockCount bc;
744
745 /* validate pointers */
746 if (!mp3DecInfo || !mp3DecInfo->FrameHeaderPS || !mp3DecInfo->SideInfoPS ||
747 !mp3DecInfo->HuffmanInfoPS || !mp3DecInfo->IMDCTInfoPS)
748 return -1;
749
750 /* si is an array of up to 4 structs, stored as gr0ch0, gr0ch1, gr1ch0, gr1ch1 */
751 fh = (FrameHeader *)(mp3DecInfo->FrameHeaderPS);
752 si = (SideInfo *)(mp3DecInfo->SideInfoPS);
753 hi = (HuffmanInfo*)(mp3DecInfo->HuffmanInfoPS);
754 mi = (IMDCTInfo *)(mp3DecInfo->IMDCTInfoPS);
755
756 /* anti-aliasing done on whole long blocks only
757 * for mixed blocks, nBfly always 1, except 3 for 8 kHz MPEG 2.5 (see sfBandTab)
758 * nLongBlocks = number of blocks with (possibly) non-zero power
759 * nBfly = number of butterflies to do (nLongBlocks - 1, unless no long blocks)
760 */
761 blockCutoff = fh->sfBand->l[(fh->ver == MPEG1 ? 8 : 6)] / 18; /* same as 3* num short sfb's in spec */
762 if (si->sis[gr][ch].blockType != 2) {
763 /* all long transforms */
764 bc.nBlocksLong = MIN((hi->nonZeroBound[ch] + 7) / 18 + 1, 32);
765 nBfly = bc.nBlocksLong - 1;
766 } else if (si->sis[gr][ch].blockType == 2 && si->sis[gr][ch].mixedBlock) {
767 /* mixed block - long transforms until cutoff, then short transforms */
768 bc.nBlocksLong = blockCutoff;
769 nBfly = bc.nBlocksLong - 1;
770 } else {
771 /* all short transforms */
772 bc.nBlocksLong = 0;
773 nBfly = 0;
774 }
775
776 AntiAlias(hi->huffDecBuf[ch], nBfly);
777 hi->nonZeroBound[ch] = MAX(hi->nonZeroBound[ch], (nBfly * 18) + 8);
778
779 ASSERT(hi->nonZeroBound[ch] <= MAX_NSAMP);
780
781 /* for readability, use a struct instead of passing a million parameters to HybridTransform() */
782 bc.nBlocksTotal = (hi->nonZeroBound[ch] + 17) / 18;
783 bc.nBlocksPrev = mi->numPrevIMDCT[ch];
784 bc.prevType = mi->prevType[ch];
785 bc.prevWinSwitch = mi->prevWinSwitch[ch];
786 bc.currWinSwitch = (si->sis[gr][ch].mixedBlock ? blockCutoff : 0); /* where WINDOW switches (not nec. transform) */
787 bc.gbIn = hi->gb[ch];
788
789 mi->numPrevIMDCT[ch] = HybridTransform(hi->huffDecBuf[ch], mi->overBuf[ch], mi->outBuf[ch], &si->sis[gr][ch], &bc);
790 mi->prevType[ch] = si->sis[gr][ch].blockType;
791 mi->prevWinSwitch[ch] = bc.currWinSwitch; /* 0 means not a mixed block (either all short or all long) */
792 mi->gb[ch] = bc.gbOut;
793
794 ASSERT(mi->numPrevIMDCT[ch] <= NBANDS);
795
796 /* output has gained 2 int bits */
797 return 0;
798}
799
800} // namespace third_party
801} // namespace fl
#define MIN(a, b)
Definition coder.h:64
#define NBANDS
Definition coder.h:107
#define CLIP_2N(y, n)
Definition coder.h:89
#define ASSERT(x)
Definition coder.h:56
#define BLOCK_SIZE
Definition coder.h:106
#define MAX(a, b)
Definition coder.h:60
struct _MP3DecInfo MP3DecInfo
@ MPEG1
Definition mp3dec.h:83
#define MAX_NSAMP
Definition mp3dec.h:79
static int32_t FreqInvertRescale(int32_t *y, int32_t *xPrev, int32_t blockIdx, int32_t es) FL_NOEXCEPT
Definition imdct.hpp:195
static int32_t c3_0
Definition imdct.hpp:479
struct fl::third_party::_IMDCTInfo IMDCTInfo
static const int32_t c9_2
Definition imdct.hpp:260
__inline int32_t MULSHIFT32(int32_t x, int32_t y) FL_NOEXCEPT
Multiply together two 32-bit numbers and return the top 32-bits of the result.
Definition assembly.h:503
static const int32_t c9_4
Definition imdct.hpp:262
struct fl::third_party::_HuffmanInfo HuffmanInfo
__inline int32_t FASTABS(int32_t x) FL_NOEXCEPT
Absolute value of x.
Definition assembly.h:513
static void WinPrevious(int32_t *xPrev, int32_t *xPrevWin, int32_t btPrev) FL_NOEXCEPT
Definition imdct.hpp:141
const int32_t imdctWin[4][36]
Definition trigtabs.hpp:107
struct fl::third_party::_FrameHeader FrameHeader
const int32_t csa[8][2]
Definition trigtabs.hpp:229
static int32_t HybridTransform(int32_t *xCurr, int32_t *xPrev, int32_t y[BLOCK_SIZE][NBANDS], SideInfoSub *sis, BlockCount *bc) FL_NOEXCEPT
Definition imdct.hpp:634
static int32_t c6[3]
Definition imdct.hpp:480
static const int32_t c9_1
Definition imdct.hpp:259
struct fl::third_party::_SideInfo SideInfo
struct fl::third_party::_BlockCount BlockCount
static __inline void idct9(int32_t *x) FL_NOEXCEPT
Definition imdct.hpp:272
int IMDCT(MP3DecInfo *mp3DecInfo, int gr, int ch) FL_NOEXCEPT
static int32_t IMDCT12x3(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT
Definition imdct.hpp:551
static const int32_t c9_0
Definition imdct.hpp:258
fl::i32 int32_t
Definition coder.h:220
static void AntiAlias(int32_t *x, int32_t nBfly) FL_NOEXCEPT
Definition imdct.hpp:80
struct fl::third_party::_SideInfoSub SideInfoSub
__inline int32_t CLZ(int32_t x) FL_NOEXCEPT
Leading zeros.
Definition assembly.h:527
static const int32_t c9_3
Definition imdct.hpp:261
static __inline void imdct12(int32_t *x, int32_t *out) FL_NOEXCEPT
Definition imdct.hpp:485
static const int32_t c18[9]
Definition imdct.hpp:267
int32_t fastWin36[18]
Definition imdct.hpp:342
static int32_t IMDCT36(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btCurr, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT
Definition imdct.hpp:381
int32_t overBuf[MAX_NCHAN][MAX_NSAMP/2]
Definition coder.h:238
int32_t outBuf[MAX_NCHAN][BLOCK_SIZE][NBANDS]
Definition coder.h:237
int32_t prevType[MAX_NCHAN]
Definition coder.h:240
int32_t prevWinSwitch[MAX_NCHAN]
Definition coder.h:241
int32_t nonZeroBound[MAX_NCHAN]
Definition coder.h:217
int32_t gb[MAX_NCHAN]
Definition coder.h:218
int32_t huffDecBuf[MAX_NCHAN][MAX_NSAMP]
Definition coder.h:216
int32_t numPrevIMDCT[MAX_NCHAN]
Definition coder.h:239
int32_t gb[MAX_NCHAN]
Definition coder.h:242
const SFBandTable * sfBand
Definition coder.h:175
SideInfoSub sis[MAX_NGRAN][MAX_NCHAN]
Definition coder.h:200
Base definition for an LED controller.
Definition crgb.hpp:179
#define FL_NOEXCEPT