d6/da2/imdct_8hpp_source.html

/* ***** BEGIN LICENSE BLOCK *****

 * Version: RCSL 1.0/RPSL 1.0

 *

 * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.

 *

 * The contents of this file, and the files included with this file, are

 * subject to the current version of the RealNetworks Public Source License

 * Version 1.0 (the "RPSL") available at

 * http://www.helixcommunity.org/content/rpsl unless you have licensed

 * the file under the RealNetworks Community Source License Version 1.0

 * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,

 * in which case the RCSL will apply. You may also obtain the license terms

 * directly from RealNetworks.  You may not use this file except in

 * compliance with the RPSL or, if you have a valid RCSL with RealNetworks

 * applicable to this file, the RCSL.  Please see the applicable RPSL or

 * RCSL for the rights, obligations and limitations governing use of the

 * contents of the file.

 *

 * This file is part of the Helix DNA Technology. RealNetworks is the

 * developer of the Original Code and owns the copyrights in the portions

 * it created.

 *

 * This file, and the files included with this file, is distributed and made

 * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER

 * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,

 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS

 * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.

 *

 * Technology Compatibility Kit Test Suite(s) Location:

 *    http://www.helixcommunity.org/content/tck

 *

 * Contributor(s):

 *

 * ***** END LICENSE BLOCK ***** */


/**************************************************************************************

 * Fixed-point MP3 decoder

 * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)

 * June 2003

 *

 * imdct.c - antialias, inverse transform (short/long/mixed), windowing,

 *             overlap-add, frequency inversion

 **************************************************************************************/


#include "coder.h"

#include "assembly.h"

#include "fl/stl/stdint.h"

#include "fl/stl/noexcept.h"


namespace fl {

namespace third_party {


/**************************************************************************************

 * Function:    AntiAlias

 *

 * Description: smooth transition across DCT block boundaries (every 18 coefficients)

 *

 * Inputs:      vector of dequantized coefficients, length = (nBfly+1) * 18

 *              number of "butterflies" to perform (one butterfly means one

 *                inter-block smoothing operation)

 *

 * Outputs:     updated coefficient vector x

 *

 * Return:      none

 *

 * Notes:       weighted average of opposite bands (pairwise) from the 8 samples

 *                before and after each block boundary

 *              nBlocks = (nonZeroBound + 7) / 18, since nZB is the first ZERO sample

 *                above which all other samples are also zero

 *              max gain per sample = 1.372

 *                MAX(i) (abs(csa[i][0]) + abs(csa[i][1]))

 *              bits gained = 0

 *              assume at least 1 guard bit in x[] to avoid overflow

 *                (should be guaranteed from dequant, and max gain from stproc * max

 *                 gain from AntiAlias < 2.0)

 **************************************************************************************/

// a little bit faster in RAM (< 1 ms per block)


static void AntiAlias(int32_t *x, int32_t nBfly) FL_NOEXCEPT

{

    int32_t k;

    int32_t a0, b0, c0, c1;

    const int32_t *c;


    /* csa = Q31 */

    for (k = nBfly; k > 0; k--) {

        c = csa[0];

        x += 18;


        a0 = x[-1];            c0 = *c;    c++;    b0 = x[0];     c1 = *c;    c++;

        x[-1] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[0] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-2];            c0 = *c;    c++;    b0 = x[1];     c1 = *c;    c++;

        x[-2] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[1] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-3];            c0 = *c;    c++;    b0 = x[2];     c1 = *c;    c++;

        x[-3] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[2] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-4];            c0 = *c;    c++;    b0 = x[3];     c1 = *c;    c++;

        x[-4] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[3] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-5];            c0 = *c;    c++;    b0 = x[4];     c1 = *c;    c++;

        x[-5] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[4] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-6];            c0 = *c;    c++;    b0 = x[5];     c1 = *c;    c++;

        x[-6] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[5] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-7];            c0 = *c;    c++;    b0 = x[6];     c1 = *c;    c++;

        x[-7] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[6] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;


        a0 = x[-8];            c0 = *c;    c++;    b0 = x[7];     c1 = *c;    c++;

        x[-8] = (MULSHIFT32(c0, a0) - MULSHIFT32(c1, b0)) * 2L;

        x[7] =  (MULSHIFT32(c0, b0) + MULSHIFT32(c1, a0)) * 2L;

    }

}


/**************************************************************************************

 * Function:    WinPrevious

 *

 * Description: apply specified window to second half of previous IMDCT (overlap part)

 *

 * Inputs:      vector of 9 coefficients (xPrev)

 *

 * Outputs:     18 windowed output coefficients (gain 1 integer bit)

 *              window type (0, 1, 2, 3)

 *

 * Return:      none

 *

 * Notes:       produces 9 output samples from 18 input samples via symmetry

 *              all blocks gain at least 1 guard bit via window (long blocks get extra

 *                sign bit, short blocks can have one addition but max gain < 1.0)

 **************************************************************************************/


static void WinPrevious(int32_t *xPrev, int32_t *xPrevWin, int32_t btPrev) FL_NOEXCEPT

{

    int32_t i;

    int32_t x, *xp, *xpwLo, *xpwHi, wLo, wHi;

    const int32_t *wpLo, *wpHi;


    xp = xPrev;

    /* mapping (see IMDCT12x3): xPrev[0-2] = sum[6-8], xPrev[3-8] = sum[12-17] */

    if (btPrev == 2) {

        /* this could be reordered for minimum loads/stores */

        wpLo = imdctWin[btPrev];

        xPrevWin[ 0] = MULSHIFT32(wpLo[ 6], xPrev[2]) + MULSHIFT32(wpLo[0], xPrev[6]);

        xPrevWin[ 1] = MULSHIFT32(wpLo[ 7], xPrev[1]) + MULSHIFT32(wpLo[1], xPrev[7]);

        xPrevWin[ 2] = MULSHIFT32(wpLo[ 8], xPrev[0]) + MULSHIFT32(wpLo[2], xPrev[8]);

        xPrevWin[ 3] = MULSHIFT32(wpLo[ 9], xPrev[0]) + MULSHIFT32(wpLo[3], xPrev[8]);

        xPrevWin[ 4] = MULSHIFT32(wpLo[10], xPrev[1]) + MULSHIFT32(wpLo[4], xPrev[7]);

        xPrevWin[ 5] = MULSHIFT32(wpLo[11], xPrev[2]) + MULSHIFT32(wpLo[5], xPrev[6]);

        xPrevWin[ 6] = MULSHIFT32(wpLo[ 6], xPrev[5]);

        xPrevWin[ 7] = MULSHIFT32(wpLo[ 7], xPrev[4]);

        xPrevWin[ 8] = MULSHIFT32(wpLo[ 8], xPrev[3]);

        xPrevWin[ 9] = MULSHIFT32(wpLo[ 9], xPrev[3]);

        xPrevWin[10] = MULSHIFT32(wpLo[10], xPrev[4]);

        xPrevWin[11] = MULSHIFT32(wpLo[11], xPrev[5]);

        xPrevWin[12] = xPrevWin[13] = xPrevWin[14] = xPrevWin[15] = xPrevWin[16] = xPrevWin[17] = 0;

    } else {

        /* use ARM-style pointers (*ptr++) so that ADS compiles well */

        wpLo = imdctWin[btPrev] + 18;

        wpHi = wpLo + 17;

        xpwLo = xPrevWin;

        xpwHi = xPrevWin + 17;

        for (i = 9; i > 0; i--) {

            x = *xp++; wLo = *wpLo++;  wHi = *wpHi--;

            *xpwLo++ = MULSHIFT32(wLo, x);

            *xpwHi-- = MULSHIFT32(wHi, x);

        }

    }

}


/**************************************************************************************

 * Function:    FreqInvertRescale

 *

 * Description: do frequency inversion (odd samples of odd blocks) and rescale

 *                if necessary (extra guard bits added before IMDCT)

 *

 * Inputs:      output vector y (18 new samples, spaced NBANDS apart)

 *              previous sample vector xPrev (9 samples)

 *              index of current block

 *              number of extra shifts added before IMDCT (usually 0)

 *

 * Outputs:     inverted and rescaled (as necessary) outputs

 *              rescaled (as necessary) previous samples

 *

 * Return:      updated mOut (from new outputs y)

 **************************************************************************************/


static int32_t FreqInvertRescale(int32_t *y, int32_t *xPrev, int32_t blockIdx, int32_t es) FL_NOEXCEPT

{

    int32_t i;

    int32_t d, mOut;

    int32_t y0, y1, y2, y3, y4, y5, y6, y7, y8;


    if (es == 0) {

        /* fast case - frequency invert only (no rescaling) - can fuse into overlap-add for speed, if desired */

        if (blockIdx & 0x01) {

            y += NBANDS;

            y0 = *y;   y += 2*NBANDS;

            y1 = *y;   y += 2*NBANDS;

            y2 = *y;   y += 2*NBANDS;

            y3 = *y;   y += 2*NBANDS;

            y4 = *y;   y += 2*NBANDS;

            y5 = *y;   y += 2*NBANDS;

            y6 = *y;   y += 2*NBANDS;

            y7 = *y;   y += 2*NBANDS;

            y8 = *y;   y += 2*NBANDS;


            y -= 18*NBANDS;

            *y = -y0;  y += 2*NBANDS;

            *y = -y1;  y += 2*NBANDS;

            *y = -y2;  y += 2*NBANDS;

            *y = -y3;  y += 2*NBANDS;

            *y = -y4;  y += 2*NBANDS;

            *y = -y5;  y += 2*NBANDS;

            *y = -y6;  y += 2*NBANDS;

            *y = -y7;  y += 2*NBANDS;

            *y = -y8;  y += 2*NBANDS;

        }

        return 0;

    } else {

        /* undo pre-IMDCT scaling, clipping if necessary */

        mOut = 0;

        if (blockIdx & 0x01) {

            /* frequency invert */

            for (i = 0; i < 18; i+=2) {

                d = *y;        CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y);    y += NBANDS;

                d = -*y;   CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y);    y += NBANDS;

                d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = static_cast<int>(static_cast<unsigned int>(d) << es);

            }

        } else {

            for (i = 0; i < 18; i+=2) {

                d = *y;        CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y);    y += NBANDS;

                d = *y;        CLIP_2N(d, 31 - es); *y = static_cast<int>(static_cast<unsigned int>(d) << es); mOut |= FASTABS(*y);    y += NBANDS;

                d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = static_cast<int>(static_cast<unsigned int>(d) << es);

            }

        }

        return mOut;

    }

}


/* format = Q31

 * #define M_PI 3.14159265358979323846

 * double u = 2.0 * M_PI / 9.0;

 * float c0 = sqrt(3.0) / 2.0;

 * float c1 = cos(u);

 * float c2 = cos(2*u);

 * float c3 = sin(u);

 * float c4 = sin(2*u);

 */


static const int32_t c9_0 = static_cast<int32_t>(0x6ed9eba1U);

static const int32_t c9_1 = static_cast<int32_t>(0x620dbe8bU);

static const int32_t c9_2 = 0x163a1a7e;

static const int32_t c9_3 = static_cast<int32_t>(0x5246dd49U);

static const int32_t c9_4 = static_cast<int32_t>(0x7e0e2e32U);


/* format = Q31

 * cos(((0:8) + 0.5) * (pi/18))

 */


static const int32_t c18[9] = {

    static_cast<int32_t>(0x7f834ed0U), static_cast<int32_t>(0x7ba3751dU), static_cast<int32_t>(0x7401e4c1U), static_cast<int32_t>(0x68d9f964U), static_cast<int32_t>(0x5a82799aU), static_cast<int32_t>(0x496af3e2U), static_cast<int32_t>(0x36185aeeU), 0x2120fb83, 0x0b27eb5c,

};


/* require at least 3 guard bits in x[] to ensure no overflow */


static __inline void idct9(int32_t *x) FL_NOEXCEPT

{

    int32_t a1, a2, a3, a4, a5, a6, a7, a8, a9;

    int32_t a10, a11, a12, a13, a14, a15, a16, a17, a18;

    int32_t a19, a20, a21, a22, a23, a24, a25, a26, a27;

    int32_t m1, m3, m5, m6, m7, m8, m9, m10, m11, m12;

    int32_t x0, x1, x2, x3, x4, x5, x6, x7, x8;


    x0 = x[0]; x1 = x[1]; x2 = x[2]; x3 = x[3]; x4 = x[4];

    x5 = x[5]; x6 = x[6]; x7 = x[7]; x8 = x[8];


    a1 = x0 - x6;

    a2 = x1 - x5;

    a3 = x1 + x5;

    a4 = x2 - x4;

    a5 = x2 + x4;

    a6 = x2 + x8;

    a7 = x1 + x7;


    a8 = a6 - a5;       /* ie x[8] - x[4] */

    a9 = a3 - a7;       /* ie x[5] - x[7] */

    a10 = a2 - x7;      /* ie x[1] - x[5] - x[7] */

    a11 = a4 - x8;      /* ie x[2] - x[4] - x[8] */


    /* do the << 1 as constant shifts where mX is actually used (free, no stall or extra inst.) */

    m1 =  MULSHIFT32(c9_0, x3);

    m3 =  MULSHIFT32(c9_0, a10);

    m5 =  MULSHIFT32(c9_1, a5);

    m6 =  MULSHIFT32(c9_2, a6);

    m7 =  MULSHIFT32(c9_1, a8);

    m8 =  MULSHIFT32(c9_2, a5);

    m9 =  MULSHIFT32(c9_3, a9);

    m10 = MULSHIFT32(c9_4, a7);

    m11 = MULSHIFT32(c9_3, a3);

    m12 = MULSHIFT32(c9_4, a9);


    a12 = x[0] +  (x[6] >> 1);

    a13 = a12  +  (  m1 * 2L);

    a14 = a12  -  (  m1 * 2L);

    a15 = a1   +  ( a11 >> 1);

    a16 = ( m5 * 2L) + (m6 * 2L);

    a17 = ( m7 * 2L) - (m8 * 2L);

    a18 = a16 + a17;

    a19 = ( m9 * 2L) + (m10 * 2L);

    a20 = (m11 * 2L) - (m12 * 2L);


    a21 = a20 - a19;

    a22 = a13 + a16;

    a23 = a14 + a16;

    a24 = a14 + a17;

    a25 = a13 + a17;

    a26 = a14 - a18;

    a27 = a13 - a18;


    x0 = a22 + a19;         x[0] = x0;

    x1 = a15 + (m3 * 2L);   x[1] = x1;

    x2 = a24 + a20;         x[2] = x2;

    x3 = a26 - a21;         x[3] = x3;

    x4 = a1 - a11;          x[4] = x4;

    x5 = a27 + a21;         x[5] = x5;

    x6 = a25 - a20;         x[6] = x6;

    x7 = a15 - (m3 * 2L);   x[7] = x7;

    x8 = a23 - a19;         x[8] = x8;

}


/* let c(j) = cos(M_PI/36 * ((j)+0.5)), s(j) = sin(M_PI/36 * ((j)+0.5))

 * then fastWin[2*j+0] = c(j)*(s(j) + c(j)), j = [0, 8]

 *      fastWin[2*j+1] = c(j)*(s(j) - c(j))

 * format = Q30

 */


int32_t fastWin36[18] = {

    static_cast<int32_t>(0x42aace8bU), static_cast<int32_t>(0xc2e92724U), static_cast<int32_t>(0x47311c28U), static_cast<int32_t>(0xc95f619aU), static_cast<int32_t>(0x4a868febU), static_cast<int32_t>(0xd0859d8cU),

    static_cast<int32_t>(0x4c913b51U), static_cast<int32_t>(0xd8243ea0U), static_cast<int32_t>(0x4d413cccU), static_cast<int32_t>(0xe0000000U), static_cast<int32_t>(0x4c913b51U), static_cast<int32_t>(0xe7dbc161U),

    static_cast<int32_t>(0x4a868febU), static_cast<int32_t>(0xef7a6275U), static_cast<int32_t>(0x47311c28U), static_cast<int32_t>(0xf6a09e67U), static_cast<int32_t>(0x42aace8bU), static_cast<int32_t>(0xfd16d8ddU),

};


/**************************************************************************************

 * Function:    IMDCT36

 *

 * Description: 36-point modified DCT, with windowing and overlap-add (50% overlap)

 *

 * Inputs:      vector of 18 coefficients (N/2 inputs produces N outputs, by symmetry)

 *              overlap part of last IMDCT (9 samples - see output comments)

 *              window type (0,1,2,3) of current and previous block

 *              current block index (for deciding whether to do frequency inversion)

 *              number of guard bits in input vector

 *

 * Outputs:     18 output samples, after windowing and overlap-add with last frame

 *              second half of (unwindowed) 36-point IMDCT - save for next time

 *                only save 9 xPrev samples, using symmetry (see WinPrevious())

 *

 * Notes:       this is Ken's hyper-fast algorithm, including symmetric sin window

 *                optimization, if applicable

 *              total number of multiplies, general case:

 *                2*10 (idct9) + 9 (last stage imdct) + 36 (for windowing) = 65

 *              total number of multiplies, btCurr == 0 && btPrev == 0:

 *                2*10 (idct9) + 9 (last stage imdct) + 18 (for windowing) = 47

 *

 *              blockType == 0 is by far the most common case, so it should be

 *                possible to use the fast path most of the time

 *              this is the fastest known algorithm for performing

 *                long IMDCT + windowing + overlap-add in MP3

 *

 * Return:      mOut (OR of abs(y) for all y calculated here)

 *

 * TODO:        optimize for ARM (reorder window coefs, ARM-style pointers in C,

 *                inline asm may or may not be helpful)

 **************************************************************************************/

// barely faster in RAM


static int32_t IMDCT36(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btCurr, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT

{

    int32_t i, es;

    int32_t xBuf[18], xPrevWin[18];

    int32_t acc1, acc2, s, d, t, mOut;

    int32_t xo, xe, c, *xp, yLo, yHi;

    const int32_t *cp, *wp;


    acc1 = acc2 = 0;

    xCurr += 17;


    /* 7 gb is always adequate for antialias + accumulator loop + idct9 */

    if (gb < 7) {

        /* rarely triggered - 5% to 10% of the time on normal clips (with Q25 input) */

        es = 7 - gb;

        for (i = 8; i >= 0; i--) {

            acc1 = ((*xCurr--) >> es) - acc1;

            acc2 = acc1 - acc2;

            acc1 = ((*xCurr--) >> es) - acc1;

            xBuf[i+9] = acc2;   /* odd */

            xBuf[i+0] = acc1;   /* even */

            xPrev[i] >>= es;

        }

    } else {

        es = 0;

        /* max gain = 18, assume adequate guard bits */

        for (i = 8; i >= 0; i--) {

            acc1 = (*xCurr--) - acc1;

            acc2 = acc1 - acc2;

            acc1 = (*xCurr--) - acc1;

            xBuf[i+9] = acc2;   /* odd */

            xBuf[i+0] = acc1;   /* even */

        }

    }

    /* xEven[0] and xOdd[0] scaled by 0.5 */

    xBuf[9] >>= 1;

    xBuf[0] >>= 1;


    /* do 9-point IDCT on even and odd */

    idct9(xBuf+0); /* even */

    idct9(xBuf+9); /* odd */


    xp = xBuf + 8;

    cp = c18 + 8;

    mOut = 0;

    if (btPrev == 0 && btCurr == 0) {

        /* fast path - use symmetry of sin window to reduce windowing multiplies to 18 (N/2) */

        wp = fastWin36;

        for (i = 0; i < 9; i++) {

            /* do ARM-style pointer arithmetic (i still needed for y[] indexing - compiler spills if 2 y pointers) */

            c = *cp--;  xo = *(xp + 9);     xe = *xp--;

            /* gain 2 int bits here */

            xo = MULSHIFT32(c, xo);           /* 2*c18*xOdd (mul by 2 implicit in scaling)  */

            xe >>= 2;


            s = -(*xPrev);      /* sum from last block (always at least 2 guard bits) */

            d = -(xe - xo);     /* gain 2 int bits, don't shift xo (effective << 1 to eat sign bit, << 1 for mul by 2) */

            (*xPrev++) = xe + xo;           /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */

            t = s - d;


            yLo = (d + (MULSHIFT32(t, *wp++) * 4L));

            yHi = (s + (MULSHIFT32(t, *wp++) * 4L));

            y[(i)*NBANDS]    =   yLo;

            y[(17-i)*NBANDS] =  yHi;

            mOut |= FASTABS(yLo);

            mOut |= FASTABS(yHi);

        }

    } else {

        /* slower method - either prev or curr is using window type != 0 so do full 36-point window

         * output xPrevWin has at least 3 guard bits (xPrev has 2, gain 1 in WinPrevious)

         */

        WinPrevious(xPrev, xPrevWin, btPrev);


        wp = imdctWin[btCurr];

        for (i = 0; i < 9; i++) {

            c = *cp--;  xo = *(xp + 9);     xe = *xp--;

            /* gain 2 int bits here */

            xo = MULSHIFT32(c, xo);           /* 2*c18*xOdd (mul by 2 implicit in scaling)  */

            xe >>= 2;


            d = xe - xo;

            (*xPrev++) = xe + xo;   /* symmetry - xPrev[i] = xPrev[17-i] for long blocks */


            yLo = (xPrevWin[i]    + MULSHIFT32(d, wp[i])) * 4L;

            yHi = (xPrevWin[17-i] + MULSHIFT32(d, wp[17-i])) * 4L;

            y[(i)*NBANDS]    = yLo;

            y[(17-i)*NBANDS] = yHi;

            mOut |= FASTABS(yLo);

            mOut |= FASTABS(yHi);

        }

    }


    xPrev -= 9;

    mOut |= FreqInvertRescale(y, xPrev, blockIdx, es);


    return mOut;

}


static int32_t c3_0 = static_cast<int32_t>(0x6ed9eba1U);  /* format = Q31, cos(pi/6) */

static int32_t c6[3] = { static_cast<int32_t>(0x7ba3751dU), static_cast<int32_t>(0x5a82799aU), 0x2120fb83 }; /* format = Q31, cos(((0:2) + 0.5) * (pi/6)) */


/* 12-point inverse DCT, used in IMDCT12x3()

 * 4 input guard bits will ensure no overflow

 */


static __inline void imdct12 (int32_t *x, int32_t *out) FL_NOEXCEPT

{

    int32_t a0, a1, a2;

    int32_t x0, x1, x2, x3, x4, x5;


    x0 = *x;   x+=3;  x1 = *x;   x+=3;

    x2 = *x;   x+=3;  x3 = *x;   x+=3;

    x4 = *x;   x+=3;  x5 = *x;   x+=3;


    x4 -= x5;

    x3 -= x4;

    x2 -= x3;

    x3 -= x5;

    x1 -= x2;

    x0 -= x1;

    x1 -= x3;


    x0 >>= 1;

    x1 >>= 1;


    a0 = MULSHIFT32(c3_0, x2) * 2L;

    a1 = x0 + (x4 >> 1);

    a2 = x0 - x4;

    x0 = a1 + a0;

    x2 = a2;

    x4 = a1 - a0;


    a0 = MULSHIFT32(c3_0, x3) * 2L;

    a1 = x1 + (x5 >> 1);

    a2 = x1 - x5;


    /* cos window odd samples, mul by 2, eat sign bit */

    x1 = MULSHIFT32(c6[0], a1 + a0) * 4L;

    x3 = MULSHIFT32(c6[1], a2) * 4L;

    x5 = MULSHIFT32(c6[2], a1 - a0) * 4L;


    *out = x0 + x1; out++;

    *out = x2 + x3; out++;

    *out = x4 + x5; out++;

    *out = x4 - x5; out++;

    *out = x2 - x3; out++;

    *out = x0 - x1;

}


/**************************************************************************************

 * Function:    IMDCT12x3

 *

 * Description: three 12-point modified DCT's for short blocks, with windowing,

 *                short block concatenation, and overlap-add

 *

 * Inputs:      3 interleaved vectors of 6 samples each

 *                (block0[0], block1[0], block2[0], block0[1], block1[1]....)

 *              overlap part of last IMDCT (9 samples - see output comments)

 *              window type (0,1,2,3) of previous block

 *              current block index (for deciding whether to do frequency inversion)

 *              number of guard bits in input vector

 *

 * Outputs:     updated sample vector x, net gain of 1 integer bit

 *              second half of (unwindowed) IMDCT's - save for next time

 *                only save 9 xPrev samples, using symmetry (see WinPrevious())

 *

 * Return:      mOut (OR of abs(y) for all y calculated here)

 *

 * TODO:        optimize for ARM

 **************************************************************************************/

 // barely faster in RAM


static int32_t IMDCT12x3(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT

{

    int32_t i, es, mOut;

    int32_t yLo, xBuf[18], xPrevWin[18]; /* need temp buffer for reordering short blocks */

    const int32_t *wp;


    es = 0;

    /* 7 gb is always adequate for accumulator loop + idct12 + window + overlap */

    if (gb < 7) {

        es = 7 - gb;

        for (i = 0; i < 18; i+=2) {

            xCurr[i+0] >>= es;

            xCurr[i+1] >>= es;

            *xPrev++ >>= es;

        }

        xPrev -= 9;

    }


    /* requires 4 input guard bits for each imdct12 */

    imdct12(xCurr + 0, xBuf + 0);

    imdct12(xCurr + 1, xBuf + 6);

    imdct12(xCurr + 2, xBuf + 12);


    /* window previous from last time */

    WinPrevious(xPrev, xPrevWin, btPrev);


    /* could unroll this for speed, minimum loads (short blocks usually rare, so doesn't make much overall difference)

     * xPrevWin[i] << 2 still has 1 gb always, max gain of windowed xBuf stuff also < 1.0 and gain the sign bit

     * so y calculations won't overflow

     */

    wp = imdctWin[2];

    mOut = 0;

    for (i = 0; i < 3; i++) {

        yLo = (xPrevWin[ 0+i] * 4L);

        mOut |= FASTABS(yLo);    y[( 0+i)*NBANDS] = yLo;

        yLo = (xPrevWin[ 3+i] * 4L);

        mOut |= FASTABS(yLo);    y[( 3+i)*NBANDS] = yLo;

        yLo = (xPrevWin[ 6+i] * 4L) + (MULSHIFT32(wp[0+i], xBuf[3+i]));

        mOut |= FASTABS(yLo);    y[( 6+i)*NBANDS] = yLo;

        yLo = (xPrevWin[ 9+i] * 4L) + (MULSHIFT32(wp[3+i], xBuf[5-i]));

        mOut |= FASTABS(yLo);    y[( 9+i)*NBANDS] = yLo;

        yLo = (xPrevWin[12+i] * 4L) + (MULSHIFT32(wp[6+i], xBuf[2-i]) + MULSHIFT32(wp[0+i], xBuf[(6+3)+i]));

        mOut |= FASTABS(yLo);    y[(12+i)*NBANDS] = yLo;

        yLo = (xPrevWin[15+i] * 4L) + (MULSHIFT32(wp[9+i], xBuf[0+i]) + MULSHIFT32(wp[3+i], xBuf[(6+5)-i]));

        mOut |= FASTABS(yLo);    y[(15+i)*NBANDS] = yLo;

    }


    /* save previous (unwindowed) for overlap - only need samples 6-8, 12-17 */

    for (i = 6; i < 9; i++)

        *xPrev++ = xBuf[i] >> 2;

    for (i = 12; i < 18; i++)

        *xPrev++ = xBuf[i] >> 2;


    xPrev -= 9;

    mOut |= FreqInvertRescale(y, xPrev, blockIdx, es);


    return mOut;

}


/**************************************************************************************

 * Function:    HybridTransform

 *

 * Description: IMDCT's, windowing, and overlap-add on long/short/mixed blocks

 *

 * Inputs:      vector of input coefficients, length = nBlocksTotal * 18)

 *              vector of overlap samples from last time, length = nBlocksPrev * 9)

 *              buffer for output samples, length = MAXNSAMP

 *              SideInfoSub struct for this granule/channel

 *              BlockCount struct with necessary info

 *                number of non-zero input and overlap blocks

 *                number of long blocks in input vector (rest assumed to be short blocks)

 *                number of blocks which use long window (type) 0 in case of mixed block

 *                  (bc->currWinSwitch, 0 for non-mixed blocks)

 *

 * Outputs:     transformed, windowed, and overlapped sample buffer

 *              does frequency inversion on odd blocks

 *              updated buffer of samples for overlap

 *

 * Return:      number of non-zero IMDCT blocks calculated in this call

 *                (including overlap-add)

 *

 * TODO:        examine mixedBlock/winSwitch logic carefully (test he_mode.bit)

 **************************************************************************************/


static int32_t HybridTransform(int32_t *xCurr, int32_t *xPrev, int32_t y[BLOCK_SIZE][NBANDS], SideInfoSub *sis, BlockCount *bc) FL_NOEXCEPT

{

    int32_t xPrevWin[18];

    int32_t currWinIdx, prevWinIdx;

    int32_t i, j, nBlocksOut, nonZero, mOut;

    int32_t xp;


    ASSERT(bc->nBlocksLong  <= NBANDS);

    ASSERT(bc->nBlocksTotal <= NBANDS);

    ASSERT(bc->nBlocksPrev  <= NBANDS);


    mOut = 0;


    /* do long blocks, if any */

    for(i = 0; i < bc->nBlocksLong; i++) {

        /* currWinIdx picks the right window for long blocks (if mixed, long blocks use window type 0) */

        currWinIdx = sis->blockType;

        if (sis->mixedBlock && i < bc->currWinSwitch)

            currWinIdx = 0;


        prevWinIdx = bc->prevType;

        if (i < bc->prevWinSwitch)

             prevWinIdx = 0;


        /* do 36-point IMDCT, including windowing and overlap-add */

        mOut |= IMDCT36(xCurr, xPrev, &(y[0][i]), currWinIdx, prevWinIdx, i, bc->gbIn);

        xCurr += 18;

        xPrev += 9;

    }


    /* do short blocks (if any) */

    for (   ; i < bc->nBlocksTotal; i++) {

        ASSERT(sis->blockType == 2);


        prevWinIdx = bc->prevType;

        if (i < bc->prevWinSwitch)

             prevWinIdx = 0;


        mOut |= IMDCT12x3(xCurr, xPrev, &(y[0][i]), prevWinIdx, i, bc->gbIn);

        xCurr += 18;

        xPrev += 9;

    }

    nBlocksOut = i;


    /* window and overlap prev if prev longer that current */

    for (   ; i < bc->nBlocksPrev; i++) {

        prevWinIdx = bc->prevType;

        if (i < bc->prevWinSwitch)

             prevWinIdx = 0;

        WinPrevious(xPrev, xPrevWin, prevWinIdx);


        nonZero = 0;

        /* sign_bit = -1 for odd i, 0 for even i */

        int32_t sign_bit = ((i & 1) ? (int32_t)(-1) : 0);

            for (j = 0; j < 9; j++) {

            xp = xPrevWin[2*j+0] * 4L;  /* * 4 temp for scaling */

            nonZero |= xp;

            y[2*j+0][i] = xp;

            mOut |= FASTABS(xp);


            /* frequency inversion on odd blocks/odd samples (flip sign if i odd, j odd) */

            xp = xPrevWin[2*j+1] * 4L;

            xp = (xp ^ sign_bit) + (i & 0x01);

            nonZero |= xp;

            y[2*j+1][i] = xp;

            mOut |= FASTABS(xp);


            xPrev[j] = 0;

        }

        xPrev += 9;

        if (nonZero)

            nBlocksOut = i;

    }


    /* clear rest of blocks */

    for (   ; i < 32; i++) {

        for (j = 0; j < 18; j++)

            y[j][i] = 0;

    }


    bc->gbOut = CLZ(mOut) - 1;


    return nBlocksOut;

}


/**************************************************************************************

 * Function:    IMDCT

 *

 * Description: do alias reduction, inverse MDCT, overlap-add, and frequency inversion

 *

 * Inputs:      MP3DecInfo structure filled by UnpackFrameHeader(), UnpackSideInfo(),

 *                UnpackScaleFactors(), and DecodeHuffman() (for this granule, channel)

 *                includes PCM samples in overBuf (from last call to IMDCT) for OLA

 *              index of current granule and channel

 *

 * Outputs:     PCM samples in outBuf, for input to subband transform

 *              PCM samples in overBuf, for OLA next time

 *              updated hi->nonZeroBound index for this channel

 *

 * Return:      0 on success,  -1 if null input pointers

 **************************************************************************************/

 // a bit faster in RAM


int32_t IMDCT(MP3DecInfo *mp3DecInfo, int32_t gr, int32_t ch) FL_NOEXCEPT

{

    int32_t nBfly, blockCutoff;

    FrameHeader *fh;

    SideInfo *si;

    HuffmanInfo *hi;

    IMDCTInfo *mi;

    BlockCount bc;


    /* validate pointers */

    if (!mp3DecInfo || !mp3DecInfo->FrameHeaderPS || !mp3DecInfo->SideInfoPS ||

        !mp3DecInfo->HuffmanInfoPS || !mp3DecInfo->IMDCTInfoPS)

        return -1;


    /* si is an array of up to 4 structs, stored as gr0ch0, gr0ch1, gr1ch0, gr1ch1 */

    fh = (FrameHeader *)(mp3DecInfo->FrameHeaderPS);

    si = (SideInfo *)(mp3DecInfo->SideInfoPS);

    hi = (HuffmanInfo*)(mp3DecInfo->HuffmanInfoPS);

    mi = (IMDCTInfo *)(mp3DecInfo->IMDCTInfoPS);


    /* anti-aliasing done on whole long blocks only

     * for mixed blocks, nBfly always 1, except 3 for 8 kHz MPEG 2.5 (see sfBandTab)

     *   nLongBlocks = number of blocks with (possibly) non-zero power

     *   nBfly = number of butterflies to do (nLongBlocks - 1, unless no long blocks)

     */

    blockCutoff = fh->sfBand->l[(fh->ver == MPEG1 ? 8 : 6)] / 18;    /* same as 3* num short sfb's in spec */

    if (si->sis[gr][ch].blockType != 2) {

        /* all long transforms */

        bc.nBlocksLong = MIN((hi->nonZeroBound[ch] + 7) / 18 + 1, 32);

        nBfly = bc.nBlocksLong - 1;

    } else if (si->sis[gr][ch].blockType == 2 && si->sis[gr][ch].mixedBlock) {

        /* mixed block - long transforms until cutoff, then short transforms */

        bc.nBlocksLong = blockCutoff;

        nBfly = bc.nBlocksLong - 1;

    } else {

        /* all short transforms */

        bc.nBlocksLong = 0;

        nBfly = 0;

    }


    AntiAlias(hi->huffDecBuf[ch], nBfly);

    hi->nonZeroBound[ch] = MAX(hi->nonZeroBound[ch], (nBfly * 18) + 8);


    ASSERT(hi->nonZeroBound[ch] <= MAX_NSAMP);


    /* for readability, use a struct instead of passing a million parameters to HybridTransform() */

    bc.nBlocksTotal = (hi->nonZeroBound[ch] + 17) / 18;

    bc.nBlocksPrev = mi->numPrevIMDCT[ch];

    bc.prevType = mi->prevType[ch];

    bc.prevWinSwitch = mi->prevWinSwitch[ch];

    bc.currWinSwitch = (si->sis[gr][ch].mixedBlock ? blockCutoff : 0);    /* where WINDOW switches (not nec. transform) */

    bc.gbIn = hi->gb[ch];


    mi->numPrevIMDCT[ch] = HybridTransform(hi->huffDecBuf[ch], mi->overBuf[ch], mi->outBuf[ch], &si->sis[gr][ch], &bc);

    mi->prevType[ch] = si->sis[gr][ch].blockType;

    mi->prevWinSwitch[ch] = bc.currWinSwitch;     /* 0 means not a mixed block (either all short or all long) */

    mi->gb[ch] = bc.gbOut;


    ASSERT(mi->numPrevIMDCT[ch] <= NBANDS);


    /* output has gained 2 int bits */

    return 0;

}


}  // namespace third_party

}  // namespace fl

assembly.h

MIN
#define MIN(a, b)
Definition coder.h:64

NBANDS
#define NBANDS
Definition coder.h:107

CLIP_2N
#define CLIP_2N(y, n)
Definition coder.h:89

ASSERT
#define ASSERT(x)
Definition coder.h:56

BLOCK_SIZE
#define BLOCK_SIZE
Definition coder.h:106

MAX
#define MAX(a, b)
Definition coder.h:60

coder.h

MP3DecInfo
struct _MP3DecInfo MP3DecInfo

MPEG1
@ MPEG1
Definition mp3dec.h:83

MAX_NSAMP
#define MAX_NSAMP
Definition mp3dec.h:79

fl::third_party::FreqInvertRescale
static int32_t FreqInvertRescale(int32_t *y, int32_t *xPrev, int32_t blockIdx, int32_t es) FL_NOEXCEPT
Definition imdct.hpp:195

fl::third_party::c3_0
static int32_t c3_0
Definition imdct.hpp:479

fl::third_party::IMDCTInfo
struct fl::third_party::_IMDCTInfo IMDCTInfo

fl::third_party::c9_2
static const int32_t c9_2
Definition imdct.hpp:260

fl::third_party::MULSHIFT32
__inline int32_t MULSHIFT32(int32_t x, int32_t y) FL_NOEXCEPT
Multiply together two 32-bit numbers and return the top 32-bits of the result.
Definition assembly.h:503

fl::third_party::c9_4
static const int32_t c9_4
Definition imdct.hpp:262

fl::third_party::HuffmanInfo
struct fl::third_party::_HuffmanInfo HuffmanInfo

fl::third_party::FASTABS
__inline int32_t FASTABS(int32_t x) FL_NOEXCEPT
Absolute value of x.
Definition assembly.h:513

fl::third_party::WinPrevious
static void WinPrevious(int32_t *xPrev, int32_t *xPrevWin, int32_t btPrev) FL_NOEXCEPT
Definition imdct.hpp:141

fl::third_party::imdctWin
const int32_t imdctWin[4][36]
Definition trigtabs.hpp:107

fl::third_party::FrameHeader
struct fl::third_party::_FrameHeader FrameHeader

fl::third_party::csa
const int32_t csa[8][2]
Definition trigtabs.hpp:229

fl::third_party::HybridTransform
static int32_t HybridTransform(int32_t *xCurr, int32_t *xPrev, int32_t y[BLOCK_SIZE][NBANDS], SideInfoSub *sis, BlockCount *bc) FL_NOEXCEPT
Definition imdct.hpp:634

fl::third_party::c6
static int32_t c6[3]
Definition imdct.hpp:480

fl::third_party::c9_1
static const int32_t c9_1
Definition imdct.hpp:259

fl::third_party::SideInfo
struct fl::third_party::_SideInfo SideInfo

fl::third_party::BlockCount
struct fl::third_party::_BlockCount BlockCount

fl::third_party::idct9
static __inline void idct9(int32_t *x) FL_NOEXCEPT
Definition imdct.hpp:272

fl::third_party::IMDCT
int IMDCT(MP3DecInfo *mp3DecInfo, int gr, int ch) FL_NOEXCEPT

fl::third_party::IMDCT12x3
static int32_t IMDCT12x3(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT
Definition imdct.hpp:551

fl::third_party::c9_0
static const int32_t c9_0
Definition imdct.hpp:258

fl::third_party::int32_t
fl::i32 int32_t
Definition coder.h:220

fl::third_party::AntiAlias
static void AntiAlias(int32_t *x, int32_t nBfly) FL_NOEXCEPT
Definition imdct.hpp:80

fl::third_party::SideInfoSub
struct fl::third_party::_SideInfoSub SideInfoSub

fl::third_party::CLZ
__inline int32_t CLZ(int32_t x) FL_NOEXCEPT
Leading zeros.
Definition assembly.h:527

fl::third_party::c9_3
static const int32_t c9_3
Definition imdct.hpp:261

fl::third_party::imdct12
static __inline void imdct12(int32_t *x, int32_t *out) FL_NOEXCEPT
Definition imdct.hpp:485

fl::third_party::c18
static const int32_t c18[9]
Definition imdct.hpp:267

fl::third_party::fastWin36
int32_t fastWin36[18]
Definition imdct.hpp:342

fl::third_party::IMDCT36
static int32_t IMDCT36(int32_t *xCurr, int32_t *xPrev, int32_t *y, int32_t btCurr, int32_t btPrev, int32_t blockIdx, int32_t gb) FL_NOEXCEPT
Definition imdct.hpp:381

fl::third_party
Definition mp3.cpp.hpp:12

fl::third_party::_BlockCount::gbOut
int32_t gbOut
Definition coder.h:253

fl::third_party::_BlockCount::nBlocksLong
int32_t nBlocksLong
Definition coder.h:246

fl::third_party::_IMDCTInfo::overBuf
int32_t overBuf[MAX_NCHAN][MAX_NSAMP/2]
Definition coder.h:238

fl::third_party::_BlockCount::currWinSwitch
int32_t currWinSwitch
Definition coder.h:251

fl::third_party::_FrameHeader::ver
MPEGVersion ver
Definition coder.h:161

fl::third_party::_IMDCTInfo::outBuf
int32_t outBuf[MAX_NCHAN][BLOCK_SIZE][NBANDS]
Definition coder.h:237

fl::third_party::_IMDCTInfo::prevType
int32_t prevType[MAX_NCHAN]
Definition coder.h:240

fl::third_party::_IMDCTInfo::prevWinSwitch
int32_t prevWinSwitch[MAX_NCHAN]
Definition coder.h:241

fl::third_party::_SideInfoSub::blockType
int32_t blockType
Definition coder.h:184

fl::third_party::_HuffmanInfo::nonZeroBound
int32_t nonZeroBound[MAX_NCHAN]
Definition coder.h:217

fl::third_party::_HuffmanInfo::gb
int32_t gb[MAX_NCHAN]
Definition coder.h:218

fl::third_party::_HuffmanInfo::huffDecBuf
int32_t huffDecBuf[MAX_NCHAN][MAX_NSAMP]
Definition coder.h:216

fl::third_party::_BlockCount::prevWinSwitch
int32_t prevWinSwitch
Definition coder.h:250

fl::third_party::_IMDCTInfo::numPrevIMDCT
int32_t numPrevIMDCT[MAX_NCHAN]
Definition coder.h:239

fl::third_party::_IMDCTInfo::gb
int32_t gb[MAX_NCHAN]
Definition coder.h:242

fl::third_party::_BlockCount::nBlocksTotal
int32_t nBlocksTotal
Definition coder.h:247

fl::third_party::_FrameHeader::sfBand
const SFBandTable * sfBand
Definition coder.h:175

fl::third_party::_SideInfoSub::mixedBlock
int32_t mixedBlock
Definition coder.h:185

fl::third_party::_SideInfo::sis
SideInfoSub sis[MAX_NGRAN][MAX_NCHAN]
Definition coder.h:200

fl::third_party::_BlockCount::nBlocksPrev
int32_t nBlocksPrev
Definition coder.h:248

fl::third_party::_BlockCount::prevType
int32_t prevType
Definition coder.h:249

fl::third_party::_SFBandTable::l
short l[23]
Definition mp3common.h:104

fl::third_party::_BlockCount::gbIn
int32_t gbIn
Definition coder.h:252

fl::x
x
Definition transposition.cpp.hpp:24

fl::y
y
Definition transposition.cpp.hpp:23

fl::t
t
Definition transposition.cpp.hpp:27

fl
Base definition for an LED controller.
Definition crgb.hpp:179

noexcept.h

FL_NOEXCEPT
#define FL_NOEXCEPT

stdint.h