2
0

polyphase.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Version: RCSL 1.0/RPSL 1.0
  3. *
  4. * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
  5. *
  6. * The contents of this file, and the files included with this file, are
  7. * subject to the current version of the RealNetworks Public Source License
  8. * Version 1.0 (the "RPSL") available at
  9. * http://www.helixcommunity.org/content/rpsl unless you have licensed
  10. * the file under the RealNetworks Community Source License Version 1.0
  11. * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
  12. * in which case the RCSL will apply. You may also obtain the license terms
  13. * directly from RealNetworks. You may not use this file except in
  14. * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
  15. * applicable to this file, the RCSL. Please see the applicable RPSL or
  16. * RCSL for the rights, obligations and limitations governing use of the
  17. * contents of the file.
  18. *
  19. * This file is part of the Helix DNA Technology. RealNetworks is the
  20. * developer of the Original Code and owns the copyrights in the portions
  21. * it created.
  22. *
  23. * This file, and the files included with this file, is distributed and made
  24. * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  25. * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  26. * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
  27. * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  28. *
  29. * Technology Compatibility Kit Test Suite(s) Location:
  30. * http://www.helixcommunity.org/content/tck
  31. *
  32. * Contributor(s):
  33. *
  34. * ***** END LICENSE BLOCK ***** */
  35. /**************************************************************************************
  36. * Fixed-point MP3 decoder
  37. * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
  38. * June 2003
  39. *
  40. * polyphase.c - final stage of subband transform (polyphase synthesis filter)
  41. *
  42. * This is the C reference version using __int64
  43. * Look in the appropriate subdirectories for optimized asm implementations
  44. * (e.g. arm/asmpoly.s)
  45. **************************************************************************************/
  46. #include "coder.h"
  47. #include "assembly.h"
  48. /* input to Polyphase = Q(DQ_FRACBITS_OUT-2), gain 2 bits in convolution
  49. * we also have the implicit bias of 2^15 to add back, so net fraction bits =
  50. * DQ_FRACBITS_OUT - 2 - 2 - 15
  51. * (see comment on Dequantize() for more info)
  52. */
  53. #define DEF_NFRACBITS (DQ_FRACBITS_OUT - 2 - 2 - 15)
  54. #define CSHIFT 12 /* coefficients have 12 leading sign bits for early-terminating mulitplies */
  55. static __inline short ClipToShort(int x, int fracBits)
  56. {
  57. int sign;
  58. /* assumes you've already rounded (x += (1 << (fracBits-1))) */
  59. x >>= fracBits;
  60. /* Ken's trick: clips to [-32768, 32767] */
  61. sign = x >> 31;
  62. if (sign != (x >> 15))
  63. x = sign ^ ((1 << 15) - 1);
  64. return (short)x;
  65. }
  66. #define MC0M(x) { \
  67. c1 = *coef; coef++; c2 = *coef; coef++; \
  68. vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
  69. sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \
  70. }
  71. #define MC1M(x) { \
  72. c1 = *coef; coef++; \
  73. vLo = *(vb1+(x)); \
  74. sum1L = MADD64(sum1L, vLo, c1); \
  75. }
  76. #define MC2M(x) { \
  77. c1 = *coef; coef++; c2 = *coef; coef++; \
  78. vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
  79. sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \
  80. sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \
  81. }
  82. /**************************************************************************************
  83. * Function: PolyphaseMono
  84. *
  85. * Description: filter one subband and produce 32 output PCM samples for one channel
  86. *
  87. * Inputs: pointer to PCM output buffer
  88. * number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
  89. * pointer to start of vbuf (preserved from last call)
  90. * start of filter coefficient table (in proper, shuffled order)
  91. * no minimum number of guard bits is required for input vbuf
  92. * (see additional scaling comments below)
  93. *
  94. * Outputs: 32 samples of one channel of decoded PCM data, (i.e. Q16.0)
  95. *
  96. * Return: none
  97. *
  98. * TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported
  99. * (note max filter gain - see polyCoef[] comments)
  100. **************************************************************************************/
  101. void PolyphaseMono(short *pcm, int *vbuf, const int *coefBase)
  102. {
  103. int i;
  104. const int *coef;
  105. int *vb1;
  106. int vLo, vHi, c1, c2;
  107. Word64 sum1L, sum2L, rndVal;
  108. rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
  109. /* special case, output sample 0 */
  110. coef = coefBase;
  111. vb1 = vbuf;
  112. sum1L = rndVal;
  113. MC0M(0)
  114. MC0M(1)
  115. MC0M(2)
  116. MC0M(3)
  117. MC0M(4)
  118. MC0M(5)
  119. MC0M(6)
  120. MC0M(7)
  121. *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
  122. /* special case, output sample 16 */
  123. coef = coefBase + 256;
  124. vb1 = vbuf + 64*16;
  125. sum1L = rndVal;
  126. MC1M(0)
  127. MC1M(1)
  128. MC1M(2)
  129. MC1M(3)
  130. MC1M(4)
  131. MC1M(5)
  132. MC1M(6)
  133. MC1M(7)
  134. *(pcm + 16) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
  135. /* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */
  136. coef = coefBase + 16;
  137. vb1 = vbuf + 64;
  138. pcm++;
  139. /* right now, the compiler creates bad asm from this... */
  140. for (i = 15; i > 0; i--) {
  141. sum1L = sum2L = rndVal;
  142. MC2M(0)
  143. MC2M(1)
  144. MC2M(2)
  145. MC2M(3)
  146. MC2M(4)
  147. MC2M(5)
  148. MC2M(6)
  149. MC2M(7)
  150. vb1 += 64;
  151. *(pcm) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
  152. *(pcm + 2*i) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
  153. pcm++;
  154. }
  155. }
  156. #define MC0S(x) { \
  157. c1 = *coef; coef++; c2 = *coef; coef++; \
  158. vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
  159. sum1L = MADD64(sum1L, vLo, c1); sum1L = MADD64(sum1L, vHi, -c2); \
  160. vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \
  161. sum1R = MADD64(sum1R, vLo, c1); sum1R = MADD64(sum1R, vHi, -c2); \
  162. }
  163. #define MC1S(x) { \
  164. c1 = *coef; coef++; \
  165. vLo = *(vb1+(x)); \
  166. sum1L = MADD64(sum1L, vLo, c1); \
  167. vLo = *(vb1+32+(x)); \
  168. sum1R = MADD64(sum1R, vLo, c1); \
  169. }
  170. #define MC2S(x) { \
  171. c1 = *coef; coef++; c2 = *coef; coef++; \
  172. vLo = *(vb1+(x)); vHi = *(vb1+(23-(x))); \
  173. sum1L = MADD64(sum1L, vLo, c1); sum2L = MADD64(sum2L, vLo, c2); \
  174. sum1L = MADD64(sum1L, vHi, -c2); sum2L = MADD64(sum2L, vHi, c1); \
  175. vLo = *(vb1+32+(x)); vHi = *(vb1+32+(23-(x))); \
  176. sum1R = MADD64(sum1R, vLo, c1); sum2R = MADD64(sum2R, vLo, c2); \
  177. sum1R = MADD64(sum1R, vHi, -c2); sum2R = MADD64(sum2R, vHi, c1); \
  178. }
  179. /**************************************************************************************
  180. * Function: PolyphaseStereo
  181. *
  182. * Description: filter one subband and produce 32 output PCM samples for each channel
  183. *
  184. * Inputs: pointer to PCM output buffer
  185. * number of "extra shifts" (vbuf format = Q(DQ_FRACBITS_OUT-2))
  186. * pointer to start of vbuf (preserved from last call)
  187. * start of filter coefficient table (in proper, shuffled order)
  188. * no minimum number of guard bits is required for input vbuf
  189. * (see additional scaling comments below)
  190. *
  191. * Outputs: 32 samples of two channels of decoded PCM data, (i.e. Q16.0)
  192. *
  193. * Return: none
  194. *
  195. * Notes: interleaves PCM samples LRLRLR...
  196. *
  197. * TODO: add 32-bit version for platforms where 64-bit mul-acc is not supported
  198. **************************************************************************************/
  199. void PolyphaseStereo(short *pcm, int *vbuf, const int *coefBase)
  200. {
  201. int i;
  202. const int *coef;
  203. int *vb1;
  204. int vLo, vHi, c1, c2;
  205. Word64 sum1L, sum2L, sum1R, sum2R, rndVal;
  206. rndVal = (Word64)( 1 << (DEF_NFRACBITS - 1 + (32 - CSHIFT)) );
  207. /* special case, output sample 0 */
  208. coef = coefBase;
  209. vb1 = vbuf;
  210. sum1L = sum1R = rndVal;
  211. MC0S(0)
  212. MC0S(1)
  213. MC0S(2)
  214. MC0S(3)
  215. MC0S(4)
  216. MC0S(5)
  217. MC0S(6)
  218. MC0S(7)
  219. *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
  220. *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
  221. /* special case, output sample 16 */
  222. coef = coefBase + 256;
  223. vb1 = vbuf + 64*16;
  224. sum1L = sum1R = rndVal;
  225. MC1S(0)
  226. MC1S(1)
  227. MC1S(2)
  228. MC1S(3)
  229. MC1S(4)
  230. MC1S(5)
  231. MC1S(6)
  232. MC1S(7)
  233. *(pcm + 2*16 + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
  234. *(pcm + 2*16 + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
  235. /* main convolution loop: sum1L = samples 1, 2, 3, ... 15 sum2L = samples 31, 30, ... 17 */
  236. coef = coefBase + 16;
  237. vb1 = vbuf + 64;
  238. pcm += 2;
  239. /* right now, the compiler creates bad asm from this... */
  240. for (i = 15; i > 0; i--) {
  241. sum1L = sum2L = rndVal;
  242. sum1R = sum2R = rndVal;
  243. MC2S(0)
  244. MC2S(1)
  245. MC2S(2)
  246. MC2S(3)
  247. MC2S(4)
  248. MC2S(5)
  249. MC2S(6)
  250. MC2S(7)
  251. vb1 += 64;
  252. *(pcm + 0) = ClipToShort((int)SAR64(sum1L, (32-CSHIFT)), DEF_NFRACBITS);
  253. *(pcm + 1) = ClipToShort((int)SAR64(sum1R, (32-CSHIFT)), DEF_NFRACBITS);
  254. *(pcm + 2*2*i + 0) = ClipToShort((int)SAR64(sum2L, (32-CSHIFT)), DEF_NFRACBITS);
  255. *(pcm + 2*2*i + 1) = ClipToShort((int)SAR64(sum2R, (32-CSHIFT)), DEF_NFRACBITS);
  256. pcm += 2;
  257. }
  258. }