pns.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Source last modified: $Id: pns.c,v 1.2 2005/03/10 17:01:56 jrecker Exp $
  3. *
  4. * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
  5. *
  6. * The contents of this file, and the files included with this file,
  7. * are subject to the current version of the RealNetworks Public
  8. * Source License (the "RPSL") available at
  9. * http://www.helixcommunity.org/content/rpsl unless you have licensed
  10. * the file under the current version of the RealNetworks Community
  11. * Source License (the "RCSL") available at
  12. * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
  13. * will apply. You may also obtain the license terms directly from
  14. * RealNetworks. You may not use this file except in compliance with
  15. * the RPSL or, if you have a valid RCSL with RealNetworks applicable
  16. * to this file, the RCSL. Please see the applicable RPSL or RCSL for
  17. * the rights, obligations and limitations governing use of the
  18. * contents of the file.
  19. *
  20. * This file is part of the Helix DNA Technology. RealNetworks is the
  21. * developer of the Original Code and owns the copyrights in the
  22. * portions it created.
  23. *
  24. * This file, and the files included with this file, is distributed
  25. * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
  26. * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
  27. * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
  28. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
  29. * ENJOYMENT OR NON-INFRINGEMENT.
  30. *
  31. * Technology Compatibility Kit Test Suite(s) Location:
  32. * http://www.helixcommunity.org/content/tck
  33. *
  34. * Contributor(s):
  35. *
  36. * ***** END LICENSE BLOCK ***** */
  37. /**************************************************************************************
  38. * Fixed-point HE-AAC decoder
  39. * Jon Recker (jrecker@real.com)
  40. * February 2005
  41. *
  42. * pns.c - perceptual noise substitution
  43. **************************************************************************************/
  44. #include "coder.h"
  45. #include "assembly.h"
  46. /**************************************************************************************
  47. * Function: Get32BitVal
  48. *
  49. * Description: generate 32-bit unsigned random number
  50. *
  51. * Inputs: last number calculated (seed, first time through)
  52. *
  53. * Outputs: new number, saved in *last
  54. *
  55. * Return: 32-bit number, uniformly distributed between [0, 2^32)
  56. *
  57. * Notes: uses simple linear congruential generator
  58. **************************************************************************************/
  59. static unsigned int Get32BitVal(unsigned int *last)
  60. {
  61. unsigned int r = *last;
  62. /* use same coefs as MPEG reference code (classic LCG)
  63. * use unsigned multiply to force reliable wraparound behavior in C (mod 2^32)
  64. */
  65. r = (1664525U * r) + 1013904223U;
  66. *last = r;
  67. return r;
  68. }
  69. #define NUM_ITER_INVSQRT 4
  70. #define X0_COEF_2 0xc0000000 /* Q29: -2.0 */
  71. #define X0_OFF_2 0x60000000 /* Q29: 3.0 */
  72. #define Q26_3 0x0c000000 /* Q26: 3.0 */
  73. /**************************************************************************************
  74. * Function: InvRootR
  75. *
  76. * Description: use Newton's method to solve for x = 1/sqrt(r)
  77. *
  78. * Inputs: r in Q30 format, range = [0.25, 1] (normalize inputs to this range)
  79. *
  80. * Outputs: none
  81. *
  82. * Return: x = Q29, range = (1, 2)
  83. *
  84. * Notes: guaranteed to converge and not overflow for any r in this range
  85. *
  86. * xn+1 = xn - f(xn)/f'(xn)
  87. * f(x) = 1/sqrt(r) - x = 0 (find root)
  88. * = 1/x^2 - r
  89. * f'(x) = -2/x^3
  90. *
  91. * so xn+1 = xn/2 * (3 - r*xn^2)
  92. *
  93. * NUM_ITER_INVSQRT = 3, maxDiff = 1.3747e-02
  94. * NUM_ITER_INVSQRT = 4, maxDiff = 3.9832e-04
  95. **************************************************************************************/
  96. static int InvRootR(int r)
  97. {
  98. int i, xn, t;
  99. /* use linear equation for initial guess
  100. * x0 = -2*r + 3 (so x0 always >= correct answer in range [0.25, 1))
  101. * xn = Q29 (at every step)
  102. */
  103. xn = (MULSHIFT32(r, X0_COEF_2) << 2) + X0_OFF_2;
  104. for (i = 0; i < NUM_ITER_INVSQRT; i++) {
  105. t = MULSHIFT32(xn, xn); /* Q26 = Q29*Q29 */
  106. t = Q26_3 - (MULSHIFT32(r, t) << 2); /* Q26 = Q26 - (Q31*Q26 << 1) */
  107. xn = MULSHIFT32(xn, t) << (6 - 1); /* Q29 = (Q29*Q26 << 6), and -1 for division by 2 */
  108. }
  109. /* clip to range (1.0, 2.0)
  110. * (because of rounding, this can converge to xn slightly > 2.0 when r is near 0.25)
  111. */
  112. if (xn >> 30)
  113. xn = (1 << 30) - 1;
  114. return xn;
  115. }
  116. /**************************************************************************************
  117. * Function: ScaleNoiseVector
  118. *
  119. * Description: apply scaling to vector of noise coefficients for one scalefactor band
  120. *
  121. * Inputs: unscaled coefficients
  122. * number of coefficients in vector (one scalefactor band of coefs)
  123. * scalefactor for this band (i.e. noise energy)
  124. *
  125. * Outputs: nVals coefficients in Q(FBITS_OUT_DQ_OFF)
  126. *
  127. * Return: guard bit mask (OR of abs value of all noise coefs)
  128. **************************************************************************************/
  129. static int ScaleNoiseVector(int *coef, int nVals, int sf)
  130. {
  131. /* pow(2, i/4.0) for i = [0,1,2,3], format = Q30 */
  132. static const int pow14[4] PROGMEM = {
  133. 0x40000000, 0x4c1bf829, 0x5a82799a, 0x6ba27e65
  134. };
  135. int i, c, spec, energy, sq, scalef, scalei, invSqrtEnergy, z, gbMask;
  136. energy = 0;
  137. for (i = 0; i < nVals; i++) {
  138. spec = coef[i];
  139. /* max nVals = max SFB width = 96, so energy can gain < 2^7 bits in accumulation */
  140. sq = (spec * spec) >> 8; /* spec*spec range = (-2^30, 2^30) */
  141. energy += sq;
  142. }
  143. /* unless nVals == 1 (or the number generator is broken...), this should not happen */
  144. if (energy == 0)
  145. return 0; /* coef[i] must = 0 for i = [0, nVals-1], so gbMask = 0 */
  146. /* pow(2, sf/4) * pow(2, FBITS_OUT_DQ_OFF) */
  147. scalef = pow14[sf & 0x3];
  148. scalei = (sf >> 2) + FBITS_OUT_DQ_OFF;
  149. /* energy has implied factor of 2^-8 since we shifted the accumulator
  150. * normalize energy to range [0.25, 1.0), calculate 1/sqrt(1), and denormalize
  151. * i.e. divide input by 2^(30-z) and convert to Q30
  152. * output of 1/sqrt(i) now has extra factor of 2^((30-z)/2)
  153. * for energy > 0, z is an even number between 0 and 28
  154. * final scaling of invSqrtEnergy:
  155. * 2^(15 - z/2) to compensate for implicit 2^(30-z) factor in input
  156. * +4 to compensate for implicit 2^-8 factor in input
  157. */
  158. z = CLZ(energy) - 2; /* energy has at least 2 leading zeros (see acc loop) */
  159. z &= 0xfffffffe; /* force even */
  160. invSqrtEnergy = InvRootR(energy << z); /* energy << z must be in range [0x10000000, 0x40000000] */
  161. scalei -= (15 - z/2 + 4); /* nInt = 1/sqrt(energy) in Q29 */
  162. /* normalize for final scaling */
  163. z = CLZ(invSqrtEnergy) - 1;
  164. invSqrtEnergy <<= z;
  165. scalei -= (z - 3 - 2); /* -2 for scalef, z-3 for invSqrtEnergy */
  166. scalef = MULSHIFT32(scalef, invSqrtEnergy); /* scalef (input) = Q30, invSqrtEnergy = Q29 * 2^z */
  167. gbMask = 0;
  168. if (scalei < 0) {
  169. scalei = -scalei;
  170. if (scalei > 31)
  171. scalei = 31;
  172. for (i = 0; i < nVals; i++) {
  173. c = MULSHIFT32(coef[i], scalef) >> scalei;
  174. gbMask |= FASTABS(c);
  175. coef[i] = c;
  176. }
  177. } else {
  178. /* for scalei <= 16, no clipping possible (coef[i] is < 2^15 before scaling)
  179. * for scalei > 16, just saturate exponent (rare)
  180. * scalef is close to full-scale (since we normalized invSqrtEnergy)
  181. * remember, we are just producing noise here
  182. */
  183. if (scalei > 16)
  184. scalei = 16;
  185. for (i = 0; i < nVals; i++) {
  186. c = MULSHIFT32(coef[i] << scalei, scalef);
  187. coef[i] = c;
  188. gbMask |= FASTABS(c);
  189. }
  190. }
  191. return gbMask;
  192. }
  193. /**************************************************************************************
  194. * Function: GenerateNoiseVector
  195. *
  196. * Description: create vector of noise coefficients for one scalefactor band
  197. *
  198. * Inputs: seed for number generator
  199. * number of coefficients to generate
  200. *
  201. * Outputs: buffer of nVals coefficients, range = [-2^15, 2^15)
  202. * updated seed for number generator
  203. *
  204. * Return: none
  205. **************************************************************************************/
  206. static void GenerateNoiseVector(int *coef, int *last, int nVals)
  207. {
  208. int i;
  209. for (i = 0; i < nVals; i++)
  210. coef[i] = ((signed int)Get32BitVal((unsigned int *)last)) >> 16;
  211. }
  212. /**************************************************************************************
  213. * Function: CopyNoiseVector
  214. *
  215. * Description: copy vector of noise coefficients for one scalefactor band from L to R
  216. *
  217. * Inputs: buffer of left coefficients
  218. * number of coefficients to copy
  219. *
  220. * Outputs: buffer of right coefficients
  221. *
  222. * Return: none
  223. **************************************************************************************/
  224. static void CopyNoiseVector(int *coefL, int *coefR, int nVals)
  225. {
  226. int i;
  227. for (i = 0; i < nVals; i++)
  228. coefR[i] = coefL[i];
  229. }
  230. /**************************************************************************************
  231. * Function: PNS
  232. *
  233. * Description: apply perceptual noise substitution, if enabled (MPEG-4 only)
  234. *
  235. * Inputs: valid AACDecInfo struct
  236. * index of current channel
  237. *
  238. * Outputs: shaped noise in scalefactor bands where PNS is active
  239. * updated minimum guard bit count for this channel
  240. *
  241. * Return: 0 if successful, -1 if error
  242. **************************************************************************************/
  243. int PNS(AACDecInfo *aacDecInfo, int ch)
  244. {
  245. int gp, sfb, win, width, nSamps, gb, gbMask;
  246. int *coef;
  247. const /*short*/ int *sfbTab;
  248. unsigned char *sfbCodeBook;
  249. short *scaleFactors;
  250. int msMaskOffset, checkCorr, genNew;
  251. unsigned char msMask;
  252. unsigned char *msMaskPtr;
  253. PSInfoBase *psi;
  254. ICSInfo *icsInfo;
  255. /* validate pointers */
  256. if (!aacDecInfo || !aacDecInfo->psInfoBase)
  257. return -1;
  258. psi = (PSInfoBase *)(aacDecInfo->psInfoBase);
  259. icsInfo = (ch == 1 && psi->commonWin == 1) ? &(psi->icsInfo[0]) : &(psi->icsInfo[ch]);
  260. if (!psi->pnsUsed[ch])
  261. return 0;
  262. if (icsInfo->winSequence == 2) {
  263. sfbTab = sfBandTabShort + sfBandTabShortOffset[psi->sampRateIdx];
  264. nSamps = NSAMPS_SHORT;
  265. } else {
  266. sfbTab = sfBandTabLong + sfBandTabLongOffset[psi->sampRateIdx];
  267. nSamps = NSAMPS_LONG;
  268. }
  269. coef = psi->coef[ch];
  270. sfbCodeBook = psi->sfbCodeBook[ch];
  271. scaleFactors = psi->scaleFactors[ch];
  272. checkCorr = (aacDecInfo->currBlockID == AAC_ID_CPE && psi->commonWin == 1 ? 1 : 0);
  273. gbMask = 0;
  274. for (gp = 0; gp < icsInfo->numWinGroup; gp++) {
  275. for (win = 0; win < icsInfo->winGroupLen[gp]; win++) {
  276. msMaskPtr = psi->msMaskBits + ((gp*icsInfo->maxSFB) >> 3);
  277. msMaskOffset = ((gp*icsInfo->maxSFB) & 0x07);
  278. msMask = (*msMaskPtr++) >> msMaskOffset;
  279. for (sfb = 0; sfb < icsInfo->maxSFB; sfb++) {
  280. width = sfbTab[sfb+1] - sfbTab[sfb];
  281. if (sfbCodeBook[sfb] == 13) {
  282. if (ch == 0) {
  283. /* generate new vector, copy into ch 1 if it's possible that the channels will be correlated
  284. * if ch 1 has PNS enabled for this SFB but it's uncorrelated (i.e. ms_used == 0),
  285. * the copied values will be overwritten when we process ch 1
  286. */
  287. GenerateNoiseVector(coef, &psi->pnsLastVal, width);
  288. if (checkCorr && psi->sfbCodeBook[1][gp*icsInfo->maxSFB + sfb] == 13)
  289. CopyNoiseVector(coef, psi->coef[1] + (coef - psi->coef[0]), width);
  290. } else {
  291. /* generate new vector if no correlation between channels */
  292. genNew = 1;
  293. if (checkCorr && psi->sfbCodeBook[0][gp*icsInfo->maxSFB + sfb] == 13) {
  294. if ( (psi->msMaskPresent == 1 && (msMask & 0x01)) || psi->msMaskPresent == 2 )
  295. genNew = 0;
  296. }
  297. if (genNew)
  298. GenerateNoiseVector(coef, &psi->pnsLastVal, width);
  299. }
  300. gbMask |= ScaleNoiseVector(coef, width, psi->scaleFactors[ch][gp*icsInfo->maxSFB + sfb]);
  301. }
  302. coef += width;
  303. /* get next mask bit (should be branchless on ARM) */
  304. msMask >>= 1;
  305. if (++msMaskOffset == 8) {
  306. msMask = *msMaskPtr++;
  307. msMaskOffset = 0;
  308. }
  309. }
  310. coef += (nSamps - sfbTab[icsInfo->maxSFB]);
  311. }
  312. sfbCodeBook += icsInfo->maxSFB;
  313. scaleFactors += icsInfo->maxSFB;
  314. }
  315. /* update guard bit count if necessary */
  316. gb = CLZ(gbMask) - 1;
  317. if (psi->gbCurrent[ch] > gb)
  318. psi->gbCurrent[ch] = gb;
  319. return 0;
  320. }