sbrimdct.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Source last modified: $Id: sbrimdct.c,v 1.1 2005/02/26 01:47:35 jrecker Exp $
  3. *
  4. * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
  5. *
  6. * The contents of this file, and the files included with this file,
  7. * are subject to the current version of the RealNetworks Public
  8. * Source License (the "RPSL") available at
  9. * http://www.helixcommunity.org/content/rpsl unless you have licensed
  10. * the file under the current version of the RealNetworks Community
  11. * Source License (the "RCSL") available at
  12. * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
  13. * will apply. You may also obtain the license terms directly from
  14. * RealNetworks. You may not use this file except in compliance with
  15. * the RPSL or, if you have a valid RCSL with RealNetworks applicable
  16. * to this file, the RCSL. Please see the applicable RPSL or RCSL for
  17. * the rights, obligations and limitations governing use of the
  18. * contents of the file.
  19. *
  20. * This file is part of the Helix DNA Technology. RealNetworks is the
  21. * developer of the Original Code and owns the copyrights in the
  22. * portions it created.
  23. *
  24. * This file, and the files included with this file, is distributed
  25. * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
  26. * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
  27. * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
  28. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
  29. * ENJOYMENT OR NON-INFRINGEMENT.
  30. *
  31. * Technology Compatibility Kit Test Suite(s) Location:
  32. * http://www.helixcommunity.org/content/tck
  33. *
  34. * Contributor(s):
  35. *
  36. * ***** END LICENSE BLOCK ***** */
  37. /**************************************************************************************
  38. * Fixed-point HE-AAC decoder
  39. * Jon Recker (jrecker@real.com)
  40. * February 2005
  41. *
  42. * sbrimdct.c - inverse MDCT without clipping or interleaving, for input to SBR
  43. **************************************************************************************/
  44. #include "coder.h"
  45. #include "assembly.h"
  46. /**************************************************************************************
  47. * Function: DecWindowOverlapNoClip
  48. *
  49. * Description: apply synthesis window, do overlap-add without clipping,
  50. * for winSequence LONG-LONG
  51. *
  52. * Inputs: input buffer (output of type-IV DCT)
  53. * overlap buffer (saved from last time)
  54. * window type (sin or KBD) for input buffer
  55. * window type (sin or KBD) for overlap buffer
  56. *
  57. * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
  58. *
  59. * Return: none
  60. *
  61. * Notes: use this function when the decoded PCM is going to the SBR decoder
  62. **************************************************************************************/
  63. void DecWindowOverlapNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
  64. {
  65. int in, w0, w1, f0, f1;
  66. int *buf1, *over1, *out1;
  67. const int *wndPrev, *wndCurr;
  68. buf0 += (1024 >> 1);
  69. buf1 = buf0 - 1;
  70. out1 = out0 + 1024 - 1;
  71. over1 = over0 + 1024 - 1;
  72. wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
  73. if (winTypeCurr == winTypePrev) {
  74. /* cut window loads in half since current and overlap sections use same symmetric window */
  75. do {
  76. w0 = *wndPrev++;
  77. w1 = *wndPrev++;
  78. in = *buf0++;
  79. f0 = MULSHIFT32(w0, in);
  80. f1 = MULSHIFT32(w1, in);
  81. in = *over0;
  82. *out0++ = in - f0;
  83. in = *over1;
  84. *out1-- = in + f1;
  85. in = *buf1--;
  86. *over1-- = MULSHIFT32(w0, in);
  87. *over0++ = MULSHIFT32(w1, in);
  88. } while (over0 < over1);
  89. } else {
  90. /* different windows for current and overlap parts - should still fit in registers on ARM w/o stack spill */
  91. wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
  92. do {
  93. w0 = *wndPrev++;
  94. w1 = *wndPrev++;
  95. in = *buf0++;
  96. f0 = MULSHIFT32(w0, in);
  97. f1 = MULSHIFT32(w1, in);
  98. in = *over0;
  99. *out0++ = in - f0;
  100. in = *over1;
  101. *out1-- = in + f1;
  102. w0 = *wndCurr++;
  103. w1 = *wndCurr++;
  104. in = *buf1--;
  105. *over1-- = MULSHIFT32(w0, in);
  106. *over0++ = MULSHIFT32(w1, in);
  107. } while (over0 < over1);
  108. }
  109. }
  110. /**************************************************************************************
  111. * Function: DecWindowOverlapLongStart
  112. *
  113. * Description: apply synthesis window, do overlap-add, without clipping
  114. * for winSequence LONG-START
  115. *
  116. * Inputs: input buffer (output of type-IV DCT)
  117. * overlap buffer (saved from last time)
  118. * window type (sin or KBD) for input buffer
  119. * window type (sin or KBD) for overlap buffer
  120. *
  121. * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
  122. *
  123. * Return: none
  124. *
  125. * Notes: use this function when the decoded PCM is going to the SBR decoder
  126. **************************************************************************************/
  127. void DecWindowOverlapLongStartNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
  128. {
  129. int i, in, w0, w1, f0, f1;
  130. int *buf1, *over1, *out1;
  131. const int *wndPrev, *wndCurr;
  132. buf0 += (1024 >> 1);
  133. buf1 = buf0 - 1;
  134. out1 = out0 + 1024 - 1;
  135. over1 = over0 + 1024 - 1;
  136. wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
  137. i = 448; /* 2 outputs, 2 overlaps per loop */
  138. do {
  139. w0 = *wndPrev++;
  140. w1 = *wndPrev++;
  141. in = *buf0++;
  142. f0 = MULSHIFT32(w0, in);
  143. f1 = MULSHIFT32(w1, in);
  144. in = *over0;
  145. *out0++ = in - f0;
  146. in = *over1;
  147. *out1-- = in + f1;
  148. in = *buf1--;
  149. *over1-- = 0; /* Wn = 0 for n = (2047, 2046, ... 1600) */
  150. *over0++ = in >> 1; /* Wn = 1 for n = (1024, 1025, ... 1471) */
  151. } while (--i);
  152. wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
  153. /* do 64 more loops - 2 outputs, 2 overlaps per loop */
  154. do {
  155. w0 = *wndPrev++;
  156. w1 = *wndPrev++;
  157. in = *buf0++;
  158. f0 = MULSHIFT32(w0, in);
  159. f1 = MULSHIFT32(w1, in);
  160. in = *over0;
  161. *out0++ = in - f0;
  162. in = *over1;
  163. *out1-- = in + f1;
  164. w0 = *wndCurr++; /* W[0], W[1], ... --> W[255], W[254], ... */
  165. w1 = *wndCurr++; /* W[127], W[126], ... --> W[128], W[129], ... */
  166. in = *buf1--;
  167. *over1-- = MULSHIFT32(w0, in); /* Wn = short window for n = (1599, 1598, ... , 1536) */
  168. *over0++ = MULSHIFT32(w1, in); /* Wn = short window for n = (1472, 1473, ... , 1535) */
  169. } while (over0 < over1);
  170. }
  171. /**************************************************************************************
  172. * Function: DecWindowOverlapLongStop
  173. *
  174. * Description: apply synthesis window, do overlap-add, without clipping
  175. * for winSequence LONG-STOP
  176. *
  177. * Inputs: input buffer (output of type-IV DCT)
  178. * overlap buffer (saved from last time)
  179. * window type (sin or KBD) for input buffer
  180. * window type (sin or KBD) for overlap buffer
  181. *
  182. * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
  183. *
  184. * Return: none
  185. *
  186. * Notes: use this function when the decoded PCM is going to the SBR decoder
  187. **************************************************************************************/
  188. void DecWindowOverlapLongStopNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
  189. {
  190. int i, in, w0, w1, f0, f1;
  191. int *buf1, *over1, *out1;
  192. const int *wndPrev, *wndCurr;
  193. buf0 += (1024 >> 1);
  194. buf1 = buf0 - 1;
  195. out1 = out0 + 1024 - 1;
  196. over1 = over0 + 1024 - 1;
  197. wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
  198. wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[1] : sinWindow + sinWindowOffset[1]);
  199. i = 448; /* 2 outputs, 2 overlaps per loop */
  200. do {
  201. /* Wn = 0 for n = (0, 1, ... 447) */
  202. /* Wn = 1 for n = (576, 577, ... 1023) */
  203. in = *buf0++;
  204. f1 = in >> 1; /* scale since skipping multiply by Q31 */
  205. in = *over0;
  206. *out0++ = in;
  207. in = *over1;
  208. *out1-- = in + f1;
  209. w0 = *wndCurr++;
  210. w1 = *wndCurr++;
  211. in = *buf1--;
  212. *over1-- = MULSHIFT32(w0, in);
  213. *over0++ = MULSHIFT32(w1, in);
  214. } while (--i);
  215. /* do 64 more loops - 2 outputs, 2 overlaps per loop */
  216. do {
  217. w0 = *wndPrev++; /* W[0], W[1], ...W[63] */
  218. w1 = *wndPrev++; /* W[127], W[126], ... W[64] */
  219. in = *buf0++;
  220. f0 = MULSHIFT32(w0, in);
  221. f1 = MULSHIFT32(w1, in);
  222. in = *over0;
  223. *out0++ = in - f0;
  224. in = *over1;
  225. *out1-- = in + f1;
  226. w0 = *wndCurr++;
  227. w1 = *wndCurr++;
  228. in = *buf1--;
  229. *over1-- = MULSHIFT32(w0, in);
  230. *over0++ = MULSHIFT32(w1, in);
  231. } while (over0 < over1);
  232. }
  233. /**************************************************************************************
  234. * Function: DecWindowOverlapShort
  235. *
  236. * Description: apply synthesis window, do overlap-add, without clipping
  237. * for winSequence EIGHT-SHORT (does all 8 short blocks)
  238. *
  239. * Inputs: input buffer (output of type-IV DCT)
  240. * overlap buffer (saved from last time)
  241. * window type (sin or KBD) for input buffer
  242. * window type (sin or KBD) for overlap buffer
  243. *
  244. * Outputs: one channel, one frame of 32-bit PCM, non-interleaved
  245. *
  246. * Return: none
  247. *
  248. * Notes: use this function when the decoded PCM is going to the SBR decoder
  249. **************************************************************************************/
  250. void DecWindowOverlapShortNoClip(int *buf0, int *over0, int *out0, int winTypeCurr, int winTypePrev)
  251. {
  252. int i, in, w0, w1, f0, f1;
  253. int *buf1, *over1, *out1;
  254. const int *wndPrev, *wndCurr;
  255. wndPrev = (winTypePrev == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
  256. wndCurr = (winTypeCurr == 1 ? kbdWindow + kbdWindowOffset[0] : sinWindow + sinWindowOffset[0]);
  257. /* pcm[0-447] = 0 + overlap[0-447] */
  258. i = 448;
  259. do {
  260. f0 = *over0++;
  261. f1 = *over0++;
  262. *out0++ = f0;
  263. *out0++ = f1;
  264. i -= 2;
  265. } while (i);
  266. /* pcm[448-575] = Wp[0-127] * block0[0-127] + overlap[448-575] */
  267. out1 = out0 + (128 - 1);
  268. over1 = over0 + 128 - 1;
  269. buf0 += 64;
  270. buf1 = buf0 - 1;
  271. do {
  272. w0 = *wndPrev++; /* W[0], W[1], ...W[63] */
  273. w1 = *wndPrev++; /* W[127], W[126], ... W[64] */
  274. in = *buf0++;
  275. f0 = MULSHIFT32(w0, in);
  276. f1 = MULSHIFT32(w1, in);
  277. in = *over0;
  278. *out0++ = in - f0;
  279. in = *over1;
  280. *out1-- = in + f1;
  281. w0 = *wndCurr++;
  282. w1 = *wndCurr++;
  283. in = *buf1--;
  284. /* save over0/over1 for next short block, in the slots just vacated */
  285. *over1-- = MULSHIFT32(w0, in);
  286. *over0++ = MULSHIFT32(w1, in);
  287. } while (over0 < over1);
  288. /* pcm[576-703] = Wc[128-255] * block0[128-255] + Wc[0-127] * block1[0-127] + overlap[576-703]
  289. * pcm[704-831] = Wc[128-255] * block1[128-255] + Wc[0-127] * block2[0-127] + overlap[704-831]
  290. * pcm[832-959] = Wc[128-255] * block2[128-255] + Wc[0-127] * block3[0-127] + overlap[832-959]
  291. */
  292. for (i = 0; i < 3; i++) {
  293. out0 += 64;
  294. out1 = out0 + 128 - 1;
  295. over0 += 64;
  296. over1 = over0 + 128 - 1;
  297. buf0 += 64;
  298. buf1 = buf0 - 1;
  299. wndCurr -= 128;
  300. do {
  301. w0 = *wndCurr++; /* W[0], W[1], ...W[63] */
  302. w1 = *wndCurr++; /* W[127], W[126], ... W[64] */
  303. in = *buf0++;
  304. f0 = MULSHIFT32(w0, in);
  305. f1 = MULSHIFT32(w1, in);
  306. in = *(over0 - 128); /* from last short block */
  307. in += *(over0 + 0); /* from last full frame */
  308. *out0++ = in - f0;
  309. in = *(over1 - 128); /* from last short block */
  310. in += *(over1 + 0); /* from last full frame */
  311. *out1-- = in + f1;
  312. /* save over0/over1 for next short block, in the slots just vacated */
  313. in = *buf1--;
  314. *over1-- = MULSHIFT32(w0, in);
  315. *over0++ = MULSHIFT32(w1, in);
  316. } while (over0 < over1);
  317. }
  318. /* pcm[960-1023] = Wc[128-191] * block3[128-191] + Wc[0-63] * block4[0-63] + overlap[960-1023]
  319. * over[0-63] = Wc[192-255] * block3[192-255] + Wc[64-127] * block4[64-127]
  320. */
  321. out0 += 64;
  322. over0 -= 832; /* points at overlap[64] */
  323. over1 = over0 + 128 - 1; /* points at overlap[191] */
  324. buf0 += 64;
  325. buf1 = buf0 - 1;
  326. wndCurr -= 128;
  327. do {
  328. w0 = *wndCurr++; /* W[0], W[1], ...W[63] */
  329. w1 = *wndCurr++; /* W[127], W[126], ... W[64] */
  330. in = *buf0++;
  331. f0 = MULSHIFT32(w0, in);
  332. f1 = MULSHIFT32(w1, in);
  333. in = *(over0 + 768); /* from last short block */
  334. in += *(over0 + 896); /* from last full frame */
  335. *out0++ = in - f0;
  336. in = *(over1 + 768); /* from last short block */
  337. *(over1 - 128) = in + f1;
  338. in = *buf1--;
  339. *over1-- = MULSHIFT32(w0, in); /* save in overlap[128-191] */
  340. *over0++ = MULSHIFT32(w1, in); /* save in overlap[64-127] */
  341. } while (over0 < over1);
  342. /* over0 now points at overlap[128] */
  343. /* over[64-191] = Wc[128-255] * block4[128-255] + Wc[0-127] * block5[0-127]
  344. * over[192-319] = Wc[128-255] * block5[128-255] + Wc[0-127] * block6[0-127]
  345. * over[320-447] = Wc[128-255] * block6[128-255] + Wc[0-127] * block7[0-127]
  346. * over[448-576] = Wc[128-255] * block7[128-255]
  347. */
  348. for (i = 0; i < 3; i++) {
  349. over0 += 64;
  350. over1 = over0 + 128 - 1;
  351. buf0 += 64;
  352. buf1 = buf0 - 1;
  353. wndCurr -= 128;
  354. do {
  355. w0 = *wndCurr++; /* W[0], W[1], ...W[63] */
  356. w1 = *wndCurr++; /* W[127], W[126], ... W[64] */
  357. in = *buf0++;
  358. f0 = MULSHIFT32(w0, in);
  359. f1 = MULSHIFT32(w1, in);
  360. /* from last short block */
  361. *(over0 - 128) -= f0;
  362. *(over1 - 128)+= f1;
  363. in = *buf1--;
  364. *over1-- = MULSHIFT32(w0, in);
  365. *over0++ = MULSHIFT32(w1, in);
  366. } while (over0 < over1);
  367. }
  368. /* over[576-1024] = 0 */
  369. i = 448;
  370. over0 += 64;
  371. do {
  372. *over0++ = 0;
  373. *over0++ = 0;
  374. *over0++ = 0;
  375. *over0++ = 0;
  376. i -= 4;
  377. } while (i);
  378. }