dqchan.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Version: RCSL 1.0/RPSL 1.0
  3. *
  4. * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
  5. *
  6. * The contents of this file, and the files included with this file, are
  7. * subject to the current version of the RealNetworks Public Source License
  8. * Version 1.0 (the "RPSL") available at
  9. * http://www.helixcommunity.org/content/rpsl unless you have licensed
  10. * the file under the RealNetworks Community Source License Version 1.0
  11. * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
  12. * in which case the RCSL will apply. You may also obtain the license terms
  13. * directly from RealNetworks. You may not use this file except in
  14. * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
  15. * applicable to this file, the RCSL. Please see the applicable RPSL or
  16. * RCSL for the rights, obligations and limitations governing use of the
  17. * contents of the file.
  18. *
  19. * This file is part of the Helix DNA Technology. RealNetworks is the
  20. * developer of the Original Code and owns the copyrights in the portions
  21. * it created.
  22. *
  23. * This file, and the files included with this file, is distributed and made
  24. * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  25. * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  26. * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
  27. * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  28. *
  29. * Technology Compatibility Kit Test Suite(s) Location:
  30. * http://www.helixcommunity.org/content/tck
  31. *
  32. * Contributor(s):
  33. *
  34. * ***** END LICENSE BLOCK ***** */
  35. /**************************************************************************************
  36. * Fixed-point MP3 decoder
  37. * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
  38. * August 2003
  39. *
  40. * dqchan.c - dequantization of transform coefficients
  41. **************************************************************************************/
  42. #include "coder.h"
  43. #include "assembly.h"
  44. typedef int ARRAY3[3]; /* for short-block reordering */
  45. /* optional pre-emphasis for high-frequency scale factor bands */
  46. static const char preTab[22] = { 0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0 };
  47. /* pow(2,-i/4) for i=0..3, Q31 format */
  48. const int pow14[4] PROGMEM = {
  49. 0x7fffffff, 0x6ba27e65, 0x5a82799a, 0x4c1bf829
  50. };
  51. /* pow(2,-i/4) * pow(j,4/3) for i=0..3 j=0..15, Q25 format */
  52. const int pow43_14[4][16] PROGMEM = {
  53. { 0x00000000, 0x10000000, 0x285145f3, 0x453a5cdb, /* Q28 */
  54. 0x0cb2ff53, 0x111989d6, 0x15ce31c8, 0x1ac7f203,
  55. 0x20000000, 0x257106b9, 0x2b16b4a3, 0x30ed74b4,
  56. 0x36f23fa5, 0x3d227bd3, 0x437be656, 0x49fc823c, },
  57. { 0x00000000, 0x0d744fcd, 0x21e71f26, 0x3a36abd9,
  58. 0x0aadc084, 0x0e610e6e, 0x12560c1d, 0x168523cf,
  59. 0x1ae89f99, 0x1f7c03a4, 0x243bae49, 0x29249c67,
  60. 0x2e34420f, 0x33686f85, 0x38bf3dff, 0x3e370182, },
  61. { 0x00000000, 0x0b504f33, 0x1c823e07, 0x30f39a55,
  62. 0x08facd62, 0x0c176319, 0x0f6b3522, 0x12efe2ad,
  63. 0x16a09e66, 0x1a79a317, 0x1e77e301, 0x2298d5b4,
  64. 0x26da56fc, 0x2b3a902a, 0x2fb7e7e7, 0x3450f650, },
  65. { 0x00000000, 0x09837f05, 0x17f910d7, 0x2929c7a9,
  66. 0x078d0dfa, 0x0a2ae661, 0x0cf73154, 0x0fec91cb,
  67. 0x1306fe0a, 0x16434a6c, 0x199ee595, 0x1d17ae3d,
  68. 0x20abd76a, 0x2459d551, 0x28204fbb, 0x2bfe1808, },
  69. };
  70. /* pow(j,4/3) for j=16..63, Q23 format */
  71. const int pow43[] PROGMEM = {
  72. 0x1428a2fa, 0x15db1bd6, 0x1796302c, 0x19598d85,
  73. 0x1b24e8bb, 0x1cf7fcfa, 0x1ed28af2, 0x20b4582a,
  74. 0x229d2e6e, 0x248cdb55, 0x26832fda, 0x28800000,
  75. 0x2a832287, 0x2c8c70a8, 0x2e9bc5d8, 0x30b0ff99,
  76. 0x32cbfd4a, 0x34eca001, 0x3712ca62, 0x393e6088,
  77. 0x3b6f47e0, 0x3da56717, 0x3fe0a5fc, 0x4220ed72,
  78. 0x44662758, 0x46b03e7c, 0x48ff1e87, 0x4b52b3f3,
  79. 0x4daaebfd, 0x5007b497, 0x5268fc62, 0x54ceb29c,
  80. 0x5738c721, 0x59a72a59, 0x5c19cd35, 0x5e90a129,
  81. 0x610b9821, 0x638aa47f, 0x660db90f, 0x6894c90b,
  82. 0x6b1fc80c, 0x6daeaa0d, 0x70416360, 0x72d7e8b0,
  83. 0x75722ef9, 0x78102b85, 0x7ab1d3ec, 0x7d571e09,
  84. };
  85. /* sqrt(0.5) in Q31 format */
  86. #define SQRTHALF 0x5a82799a
  87. /*
  88. * Minimax polynomial approximation to pow(x, 4/3), over the range
  89. * poly43lo: x = [0.5, 0.7071]
  90. * poly43hi: x = [0.7071, 1.0]
  91. *
  92. * Relative error < 1E-7
  93. * Coefs are scaled by 4, 2, 1, 0.5, 0.25
  94. */
  95. static const unsigned int poly43lo[5] PROGMEM = { 0x29a0bda9, 0xb02e4828, 0x5957aa1b, 0x236c498d, 0xff581859 };
  96. static const unsigned int poly43hi[5] PROGMEM = { 0x10852163, 0xd333f6a4, 0x46e9408b, 0x27c2cef0, 0xfef577b4 };
  97. /* pow(2, i*4/3) as exp and frac */
  98. const int pow2exp[8] PROGMEM = { 14, 13, 11, 10, 9, 7, 6, 5 };
  99. const int pow2frac[8] PROGMEM = {
  100. 0x6597fa94, 0x50a28be6, 0x7fffffff, 0x6597fa94,
  101. 0x50a28be6, 0x7fffffff, 0x6597fa94, 0x50a28be6
  102. };
  103. /**************************************************************************************
  104. * Function: DequantBlock
  105. *
  106. * Description: Ken's highly-optimized, low memory dequantizer performing the operation
  107. * y = pow(x, 4.0/3.0) * pow(2, 25 - scale/4.0)
  108. *
  109. * Inputs: input buffer of decode Huffman codewords (signed-magnitude)
  110. * output buffer of same length (in-place (outbuf = inbuf) is allowed)
  111. * number of samples
  112. *
  113. * Outputs: dequantized samples in Q25 format
  114. *
  115. * Return: bitwise-OR of the unsigned outputs (for guard bit calculations)
  116. **************************************************************************************/
  117. /* __attribute__ ((section (".data"))) */ static int DequantBlock(int *inbuf, int *outbuf, int num, int scale)
  118. {
  119. int tab4[4];
  120. int scalef, scalei, shift;
  121. int sx, x, y;
  122. int mask = 0;
  123. const int *tab16;
  124. const unsigned int *coef;
  125. tab16 = pow43_14[scale & 0x3];
  126. scalef = pow14[scale & 0x3];
  127. scalei = MIN(scale >> 2, 31); /* smallest input scale = -47, so smallest scalei = -12 */
  128. /* cache first 4 values */
  129. shift = MIN(scalei + 3, 31);
  130. shift = MAX(shift, 0);
  131. tab4[0] = 0;
  132. tab4[1] = tab16[1] >> shift;
  133. tab4[2] = tab16[2] >> shift;
  134. tab4[3] = tab16[3] >> shift;
  135. do {
  136. sx = *inbuf++;
  137. x = sx & 0x7fffffff; /* sx = sign|mag */
  138. if (x < 4) {
  139. y = tab4[x];
  140. } else if (x < 16) {
  141. y = tab16[x];
  142. y = (scalei < 0) ? y << -scalei : y >> scalei;
  143. } else {
  144. if (x < 64) {
  145. y = pow43[x-16];
  146. /* fractional scale */
  147. y = MULSHIFT32(y, scalef);
  148. shift = scalei - 3;
  149. } else {
  150. /* normalize to [0x40000000, 0x7fffffff] */
  151. x <<= 17;
  152. shift = 0;
  153. if (x < 0x08000000)
  154. x <<= 4, shift += 4;
  155. if (x < 0x20000000)
  156. x <<= 2, shift += 2;
  157. if (x < 0x40000000)
  158. x <<= 1, shift += 1;
  159. coef = (x < SQRTHALF) ? poly43lo : poly43hi;
  160. /* polynomial */
  161. y = coef[0];
  162. y = MULSHIFT32(y, x) + coef[1];
  163. y = MULSHIFT32(y, x) + coef[2];
  164. y = MULSHIFT32(y, x) + coef[3];
  165. y = MULSHIFT32(y, x) + coef[4];
  166. y = MULSHIFT32(y, pow2frac[shift]) << 3;
  167. /* fractional scale */
  168. y = MULSHIFT32(y, scalef);
  169. shift = scalei - pow2exp[shift];
  170. }
  171. /* integer scale */
  172. if (shift < 0) {
  173. shift = -shift;
  174. if (y > (0x7fffffff >> shift))
  175. y = 0x7fffffff; /* clip */
  176. else
  177. y <<= shift;
  178. } else {
  179. y >>= shift;
  180. }
  181. }
  182. /* sign and store */
  183. mask |= y;
  184. *outbuf++ = (sx < 0) ? -y : y;
  185. } while (--num);
  186. return mask;
  187. }
  188. /**************************************************************************************
  189. * Function: DequantChannel
  190. *
  191. * Description: dequantize one granule, one channel worth of decoded Huffman codewords
  192. *
  193. * Inputs: sample buffer (decoded Huffman codewords), length = MAX_NSAMP samples
  194. * work buffer for reordering short-block, length = MAX_REORDER_SAMPS
  195. * samples (3 * width of largest short-block critical band)
  196. * non-zero bound for this channel/granule
  197. * valid FrameHeader, SideInfoSub, ScaleFactorInfoSub, and CriticalBandInfo
  198. * structures for this channel/granule
  199. *
  200. * Outputs: MAX_NSAMP dequantized samples in sampleBuf
  201. * updated non-zero bound (indicating which samples are != 0 after DQ)
  202. * filled-in cbi structure indicating start and end critical bands
  203. *
  204. * Return: minimum number of guard bits in dequantized sampleBuf
  205. *
  206. * Notes: dequantized samples in Q(DQ_FRACBITS_OUT) format
  207. **************************************************************************************/
  208. /* __attribute__ ((section (".data"))) */ int DequantChannel(int *sampleBuf, int *workBuf, int *nonZeroBound, FrameHeader *fh, SideInfoSub *sis,
  209. ScaleFactorInfoSub *sfis, CriticalBandInfo *cbi)
  210. {
  211. int i, j, w, cb;
  212. int /* cbStartL, */ cbEndL, cbStartS, cbEndS;
  213. int nSamps, nonZero, sfactMultiplier, gbMask;
  214. int globalGain, gainI;
  215. int cbMax[3];
  216. ARRAY3 *buf; /* short block reorder */
  217. /* set default start/end points for short/long blocks - will update with non-zero cb info */
  218. if (sis->blockType == 2) {
  219. // cbStartL = 0;
  220. if (sis->mixedBlock) {
  221. cbEndL = (fh->ver == MPEG1 ? 8 : 6);
  222. cbStartS = 3;
  223. } else {
  224. cbEndL = 0;
  225. cbStartS = 0;
  226. }
  227. cbEndS = 13;
  228. } else {
  229. /* long block */
  230. //cbStartL = 0;
  231. cbEndL = 22;
  232. cbStartS = 13;
  233. cbEndS = 13;
  234. }
  235. cbMax[2] = cbMax[1] = cbMax[0] = 0;
  236. gbMask = 0;
  237. i = 0;
  238. /* sfactScale = 0 --> quantizer step size = 2
  239. * sfactScale = 1 --> quantizer step size = sqrt(2)
  240. * so sfactMultiplier = 2 or 4 (jump through globalGain by powers of 2 or sqrt(2))
  241. */
  242. sfactMultiplier = 2 * (sis->sfactScale + 1);
  243. /* offset globalGain by -2 if midSide enabled, for 1/sqrt(2) used in MidSideProc()
  244. * (DequantBlock() does 0.25 * gainI so knocking it down by two is the same as
  245. * dividing every sample by sqrt(2) = multiplying by 2^-.5)
  246. */
  247. globalGain = sis->globalGain;
  248. if (fh->modeExt >> 1)
  249. globalGain -= 2;
  250. globalGain += IMDCT_SCALE; /* scale everything by sqrt(2), for fast IMDCT36 */
  251. /* long blocks */
  252. for (cb = 0; cb < cbEndL; cb++) {
  253. nonZero = 0;
  254. nSamps = fh->sfBand->l[cb + 1] - fh->sfBand->l[cb];
  255. gainI = 210 - globalGain + sfactMultiplier * (sfis->l[cb] + (sis->preFlag ? (int)preTab[cb] : 0));
  256. nonZero |= DequantBlock(sampleBuf + i, sampleBuf + i, nSamps, gainI);
  257. i += nSamps;
  258. /* update highest non-zero critical band */
  259. if (nonZero)
  260. cbMax[0] = cb;
  261. gbMask |= nonZero;
  262. if (i >= *nonZeroBound)
  263. break;
  264. }
  265. /* set cbi (Type, EndS[], EndSMax will be overwritten if we proceed to do short blocks) */
  266. cbi->cbType = 0; /* long only */
  267. cbi->cbEndL = cbMax[0];
  268. cbi->cbEndS[0] = cbi->cbEndS[1] = cbi->cbEndS[2] = 0;
  269. cbi->cbEndSMax = 0;
  270. /* early exit if no short blocks */
  271. if (cbStartS >= 12)
  272. return CLZ(gbMask) - 1;
  273. /* short blocks */
  274. cbMax[2] = cbMax[1] = cbMax[0] = cbStartS;
  275. for (cb = cbStartS; cb < cbEndS; cb++) {
  276. nSamps = fh->sfBand->s[cb + 1] - fh->sfBand->s[cb];
  277. for (w = 0; w < 3; w++) {
  278. nonZero = 0;
  279. gainI = 210 - globalGain + 8*sis->subBlockGain[w] + sfactMultiplier*(sfis->s[cb][w]);
  280. nonZero |= DequantBlock(sampleBuf + i + nSamps*w, workBuf + nSamps*w, nSamps, gainI);
  281. /* update highest non-zero critical band */
  282. if (nonZero)
  283. cbMax[w] = cb;
  284. gbMask |= nonZero;
  285. }
  286. /* reorder blocks */
  287. buf = (ARRAY3 *)(sampleBuf + i);
  288. i += 3*nSamps;
  289. for (j = 0; j < nSamps; j++) {
  290. buf[j][0] = workBuf[0*nSamps + j];
  291. buf[j][1] = workBuf[1*nSamps + j];
  292. buf[j][2] = workBuf[2*nSamps + j];
  293. }
  294. ASSERT(3*nSamps <= MAX_REORDER_SAMPS);
  295. if (i >= *nonZeroBound)
  296. break;
  297. }
  298. /* i = last non-zero INPUT sample processed, which corresponds to highest possible non-zero
  299. * OUTPUT sample (after reorder)
  300. * however, the original nzb is no longer necessarily true
  301. * for each cb, buf[][] is updated with 3*nSamps samples (i increases 3*nSamps each time)
  302. * (buf[j + 1][0] = 3 (input) samples ahead of buf[j][0])
  303. * so update nonZeroBound to i
  304. */
  305. *nonZeroBound = i;
  306. ASSERT(*nonZeroBound <= MAX_NSAMP);
  307. cbi->cbType = (sis->mixedBlock ? 2 : 1); /* 2 = mixed short/long, 1 = short only */
  308. cbi->cbEndS[0] = cbMax[0];
  309. cbi->cbEndS[1] = cbMax[1];
  310. cbi->cbEndS[2] = cbMax[2];
  311. cbi->cbEndSMax = cbMax[0];
  312. cbi->cbEndSMax = MAX(cbi->cbEndSMax, cbMax[1]);
  313. cbi->cbEndSMax = MAX(cbi->cbEndSMax, cbMax[2]);
  314. return CLZ(gbMask) - 1;
  315. }