/* ***** BEGIN LICENSE BLOCK ***** * Source last modified: $Id: pns.c,v 1.2 2005/03/10 17:01:56 jrecker Exp $ * * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved. * * The contents of this file, and the files included with this file, * are subject to the current version of the RealNetworks Public * Source License (the "RPSL") available at * http://www.helixcommunity.org/content/rpsl unless you have licensed * the file under the current version of the RealNetworks Community * Source License (the "RCSL") available at * http://www.helixcommunity.org/content/rcsl, in which case the RCSL * will apply. You may also obtain the license terms directly from * RealNetworks. You may not use this file except in compliance with * the RPSL or, if you have a valid RCSL with RealNetworks applicable * to this file, the RCSL. Please see the applicable RPSL or RCSL for * the rights, obligations and limitations governing use of the * contents of the file. * * This file is part of the Helix DNA Technology. RealNetworks is the * developer of the Original Code and owns the copyrights in the * portions it created. * * This file, and the files included with this file, is distributed * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET * ENJOYMENT OR NON-INFRINGEMENT. * * Technology Compatibility Kit Test Suite(s) Location: * http://www.helixcommunity.org/content/tck * * Contributor(s): * * ***** END LICENSE BLOCK ***** */ /************************************************************************************** * Fixed-point HE-AAC decoder * Jon Recker (jrecker@real.com) * February 2005 * * pns.c - perceptual noise substitution **************************************************************************************/ #include "coder.h" #include "assembly.h" /************************************************************************************** * Function: Get32BitVal * * Description: generate 32-bit unsigned random number * * Inputs: last number calculated (seed, first time through) * * Outputs: new number, saved in *last * * Return: 32-bit number, uniformly distributed between [0, 2^32) * * Notes: uses simple linear congruential generator **************************************************************************************/ static unsigned int Get32BitVal(unsigned int *last) { unsigned int r = *last; /* use same coefs as MPEG reference code (classic LCG) * use unsigned multiply to force reliable wraparound behavior in C (mod 2^32) */ r = (1664525U * r) + 1013904223U; *last = r; return r; } #define NUM_ITER_INVSQRT 4 #define X0_COEF_2 0xc0000000 /* Q29: -2.0 */ #define X0_OFF_2 0x60000000 /* Q29: 3.0 */ #define Q26_3 0x0c000000 /* Q26: 3.0 */ /************************************************************************************** * Function: InvRootR * * Description: use Newton's method to solve for x = 1/sqrt(r) * * Inputs: r in Q30 format, range = [0.25, 1] (normalize inputs to this range) * * Outputs: none * * Return: x = Q29, range = (1, 2) * * Notes: guaranteed to converge and not overflow for any r in this range * * xn+1 = xn - f(xn)/f'(xn) * f(x) = 1/sqrt(r) - x = 0 (find root) * = 1/x^2 - r * f'(x) = -2/x^3 * * so xn+1 = xn/2 * (3 - r*xn^2) * * NUM_ITER_INVSQRT = 3, maxDiff = 1.3747e-02 * NUM_ITER_INVSQRT = 4, maxDiff = 3.9832e-04 **************************************************************************************/ static int InvRootR(int r) { int i, xn, t; /* use linear equation for initial guess * x0 = -2*r + 3 (so x0 always >= correct answer in range [0.25, 1)) * xn = Q29 (at every step) */ xn = (MULSHIFT32(r, X0_COEF_2) << 2) + X0_OFF_2; for (i = 0; i < NUM_ITER_INVSQRT; i++) { t = MULSHIFT32(xn, xn); /* Q26 = Q29*Q29 */ t = Q26_3 - (MULSHIFT32(r, t) << 2); /* Q26 = Q26 - (Q31*Q26 << 1) */ xn = MULSHIFT32(xn, t) << (6 - 1); /* Q29 = (Q29*Q26 << 6), and -1 for division by 2 */ } /* clip to range (1.0, 2.0) * (because of rounding, this can converge to xn slightly > 2.0 when r is near 0.25) */ if (xn >> 30) xn = (1 << 30) - 1; return xn; } /************************************************************************************** * Function: ScaleNoiseVector * * Description: apply scaling to vector of noise coefficients for one scalefactor band * * Inputs: unscaled coefficients * number of coefficients in vector (one scalefactor band of coefs) * scalefactor for this band (i.e. noise energy) * * Outputs: nVals coefficients in Q(FBITS_OUT_DQ_OFF) * * Return: guard bit mask (OR of abs value of all noise coefs) **************************************************************************************/ static int ScaleNoiseVector(int *coef, int nVals, int sf) { /* pow(2, i/4.0) for i = [0,1,2,3], format = Q30 */ static const int pow14[4] PROGMEM = { 0x40000000, 0x4c1bf829, 0x5a82799a, 0x6ba27e65 }; int i, c, spec, energy, sq, scalef, scalei, invSqrtEnergy, z, gbMask; energy = 0; for (i = 0; i < nVals; i++) { spec = coef[i]; /* max nVals = max SFB width = 96, so energy can gain < 2^7 bits in accumulation */ sq = (spec * spec) >> 8; /* spec*spec range = (-2^30, 2^30) */ energy += sq; } /* unless nVals == 1 (or the number generator is broken...), this should not happen */ if (energy == 0) return 0; /* coef[i] must = 0 for i = [0, nVals-1], so gbMask = 0 */ /* pow(2, sf/4) * pow(2, FBITS_OUT_DQ_OFF) */ scalef = pow14[sf & 0x3]; scalei = (sf >> 2) + FBITS_OUT_DQ_OFF; /* energy has implied factor of 2^-8 since we shifted the accumulator * normalize energy to range [0.25, 1.0), calculate 1/sqrt(1), and denormalize * i.e. divide input by 2^(30-z) and convert to Q30 * output of 1/sqrt(i) now has extra factor of 2^((30-z)/2) * for energy > 0, z is an even number between 0 and 28 * final scaling of invSqrtEnergy: * 2^(15 - z/2) to compensate for implicit 2^(30-z) factor in input * +4 to compensate for implicit 2^-8 factor in input */ z = CLZ(energy) - 2; /* energy has at least 2 leading zeros (see acc loop) */ z &= 0xfffffffe; /* force even */ invSqrtEnergy = InvRootR(energy << z); /* energy << z must be in range [0x10000000, 0x40000000] */ scalei -= (15 - z/2 + 4); /* nInt = 1/sqrt(energy) in Q29 */ /* normalize for final scaling */ z = CLZ(invSqrtEnergy) - 1; invSqrtEnergy <<= z; scalei -= (z - 3 - 2); /* -2 for scalef, z-3 for invSqrtEnergy */ scalef = MULSHIFT32(scalef, invSqrtEnergy); /* scalef (input) = Q30, invSqrtEnergy = Q29 * 2^z */ gbMask = 0; if (scalei < 0) { scalei = -scalei; if (scalei > 31) scalei = 31; for (i = 0; i < nVals; i++) { c = MULSHIFT32(coef[i], scalef) >> scalei; gbMask |= FASTABS(c); coef[i] = c; } } else { /* for scalei <= 16, no clipping possible (coef[i] is < 2^15 before scaling) * for scalei > 16, just saturate exponent (rare) * scalef is close to full-scale (since we normalized invSqrtEnergy) * remember, we are just producing noise here */ if (scalei > 16) scalei = 16; for (i = 0; i < nVals; i++) { c = MULSHIFT32(coef[i] << scalei, scalef); coef[i] = c; gbMask |= FASTABS(c); } } return gbMask; } /************************************************************************************** * Function: GenerateNoiseVector * * Description: create vector of noise coefficients for one scalefactor band * * Inputs: seed for number generator * number of coefficients to generate * * Outputs: buffer of nVals coefficients, range = [-2^15, 2^15) * updated seed for number generator * * Return: none **************************************************************************************/ static void GenerateNoiseVector(int *coef, int *last, int nVals) { int i; for (i = 0; i < nVals; i++) coef[i] = ((signed int)Get32BitVal((unsigned int *)last)) >> 16; } /************************************************************************************** * Function: CopyNoiseVector * * Description: copy vector of noise coefficients for one scalefactor band from L to R * * Inputs: buffer of left coefficients * number of coefficients to copy * * Outputs: buffer of right coefficients * * Return: none **************************************************************************************/ static void CopyNoiseVector(int *coefL, int *coefR, int nVals) { int i; for (i = 0; i < nVals; i++) coefR[i] = coefL[i]; } /************************************************************************************** * Function: PNS * * Description: apply perceptual noise substitution, if enabled (MPEG-4 only) * * Inputs: valid AACDecInfo struct * index of current channel * * Outputs: shaped noise in scalefactor bands where PNS is active * updated minimum guard bit count for this channel * * Return: 0 if successful, -1 if error **************************************************************************************/ int PNS(AACDecInfo *aacDecInfo, int ch) { int gp, sfb, win, width, nSamps, gb, gbMask; int *coef; const /*short*/ int *sfbTab; unsigned char *sfbCodeBook; short *scaleFactors; int msMaskOffset, checkCorr, genNew; unsigned char msMask; unsigned char *msMaskPtr; PSInfoBase *psi; ICSInfo *icsInfo; /* validate pointers */ if (!aacDecInfo || !aacDecInfo->psInfoBase) return -1; psi = (PSInfoBase *)(aacDecInfo->psInfoBase); icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]); if (!psi->pnsUsed[ch]) return 0; if (icsInfo->winSequence == 2) { sfbTab = sfBandTabShort + sfBandTabShortOffset[psi->sampRateIdx]; nSamps = NSAMPS_SHORT; } else { sfbTab = sfBandTabLong + sfBandTabLongOffset[psi->sampRateIdx]; nSamps = NSAMPS_LONG; } coef = psi->coef[ch]; sfbCodeBook = psi->sfbCodeBook[ch]; scaleFactors = psi->scaleFactors[ch]; checkCorr = (aacDecInfo->currBlockID == AAC_ID_CPE && psi->commonWin == 1 ? 1 : 0); gbMask = 0; for (gp = 0; gp < icsInfo->numWinGroup; gp++) { for (win = 0; win < icsInfo->winGroupLen[gp]; win++) { msMaskPtr = psi->msMaskBits + ((gp*icsInfo->maxSFB) >> 3); msMaskOffset = ((gp*icsInfo->maxSFB) & 0x07); msMask = (*msMaskPtr++) >> msMaskOffset; for (sfb = 0; sfb < icsInfo->maxSFB; sfb++) { width = sfbTab[sfb+1] - sfbTab[sfb]; if (sfbCodeBook[sfb] == 13) { if (ch == 0) { /* generate new vector, copy into ch 1 if it's possible that the channels will be correlated * if ch 1 has PNS enabled for this SFB but it's uncorrelated (i.e. ms_used == 0), * the copied values will be overwritten when we process ch 1 */ GenerateNoiseVector(coef, &psi->pnsLastVal, width); if (checkCorr && psi->sfbCodeBook[1][gp*icsInfo->maxSFB + sfb] == 13) CopyNoiseVector(coef, psi->coef[1] + (coef - psi->coef[0]), width); } else { /* generate new vector if no correlation between channels */ genNew = 1; if (checkCorr && psi->sfbCodeBook[0][gp*icsInfo->maxSFB + sfb] == 13) { if ( (psi->msMaskPresent == 1 && (msMask & 0x01)) || psi->msMaskPresent == 2 ) genNew = 0; } if (genNew) GenerateNoiseVector(coef, &psi->pnsLastVal, width); } gbMask |= ScaleNoiseVector(coef, width, psi->scaleFactors[ch][gp*icsInfo->maxSFB + sfb]); } coef += width; /* get next mask bit (should be branchless on ARM) */ msMask >>= 1; if (++msMaskOffset == 8) { msMask = *msMaskPtr++; msMaskOffset = 0; } } coef += (nSamps - sfbTab[icsInfo->maxSFB]); } sfbCodeBook += icsInfo->maxSFB; scaleFactors += icsInfo->maxSFB; } /* update guard bit count if necessary */ gb = CLZ(gbMask) - 1; if (psi->gbCurrent[ch] > gb) psi->gbCurrent[ch] = gb; return 0; }