2
0

sbrhfgen.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Source last modified: $Id: sbrhfgen.c,v 1.2 2005/05/19 20:45:20 jrecker Exp $
  3. *
  4. * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
  5. *
  6. * The contents of this file, and the files included with this file,
  7. * are subject to the current version of the RealNetworks Public
  8. * Source License (the "RPSL") available at
  9. * http://www.helixcommunity.org/content/rpsl unless you have licensed
  10. * the file under the current version of the RealNetworks Community
  11. * Source License (the "RCSL") available at
  12. * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
  13. * will apply. You may also obtain the license terms directly from
  14. * RealNetworks. You may not use this file except in compliance with
  15. * the RPSL or, if you have a valid RCSL with RealNetworks applicable
  16. * to this file, the RCSL. Please see the applicable RPSL or RCSL for
  17. * the rights, obligations and limitations governing use of the
  18. * contents of the file.
  19. *
  20. * This file is part of the Helix DNA Technology. RealNetworks is the
  21. * developer of the Original Code and owns the copyrights in the
  22. * portions it created.
  23. *
  24. * This file, and the files included with this file, is distributed
  25. * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
  26. * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
  27. * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
  28. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
  29. * ENJOYMENT OR NON-INFRINGEMENT.
  30. *
  31. * Technology Compatibility Kit Test Suite(s) Location:
  32. * http://www.helixcommunity.org/content/tck
  33. *
  34. * Contributor(s):
  35. *
  36. * ***** END LICENSE BLOCK ***** */
  37. /**************************************************************************************
  38. * Fixed-point HE-AAC decoder
  39. * Jon Recker (jrecker@real.com)
  40. * February 2005
  41. *
  42. * sbrhfgen.c - high frequency generation for SBR
  43. **************************************************************************************/
  44. #include "sbr.h"
  45. #include "assembly.h"
  46. #define FBITS_LPCOEFS 29 /* Q29 for range of (-4, 4) */
  47. #define MAG_16 (16 * (1 << (32 - (2*(32-FBITS_LPCOEFS))))) /* i.e. 16 in Q26 format */
  48. #define RELAX_COEF 0x7ffff79c /* 1.0 / (1.0 + 1e-6), Q31 */
  49. /* newBWTab[prev invfMode][curr invfMode], format = Q31 (table 4.158)
  50. * sample file which uses all of these: al_sbr_sr_64_2_fsaac32.aac
  51. */
  52. static const int newBWTab[4][4] PROGMEM = {
  53. {0x00000000, 0x4ccccccd, 0x73333333, 0x7d70a3d7},
  54. {0x4ccccccd, 0x60000000, 0x73333333, 0x7d70a3d7},
  55. {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
  56. {0x00000000, 0x60000000, 0x73333333, 0x7d70a3d7},
  57. };
  58. /**************************************************************************************
  59. * Function: CVKernel1
  60. *
  61. * Description: kernel of covariance matrix calculation for p01, p11, p12, p22
  62. *
  63. * Inputs: buffer of low-freq samples, starting at time index = 0,
  64. * freq index = patch subband
  65. *
  66. * Outputs: 64-bit accumulators for p01re, p01im, p12re, p12im, p11re, p22re
  67. * stored in accBuf
  68. *
  69. * Return: none
  70. *
  71. * Notes: this is carefully written to be efficient on ARM
  72. * use the assembly code version in sbrcov.s when building for ARM!
  73. **************************************************************************************/
  74. #if (defined (XXXX__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(XXXX__arm__))
  75. #ifdef __cplusplus
  76. extern "C"
  77. #endif
  78. void CVKernel1(int *XBuf, int *accBuf);
  79. #else
  80. void CVKernel1(int *XBuf, int *accBuf)
  81. {
  82. U64 p01re, p01im, p12re, p12im, p11re, p22re;
  83. int n, x0re, x0im, x1re, x1im;
  84. x0re = XBuf[0];
  85. x0im = XBuf[1];
  86. XBuf += (2*64);
  87. x1re = XBuf[0];
  88. x1im = XBuf[1];
  89. XBuf += (2*64);
  90. p01re.w64 = p01im.w64 = 0;
  91. p12re.w64 = p12im.w64 = 0;
  92. p11re.w64 = 0;
  93. p22re.w64 = 0;
  94. p12re.w64 = MADD64(p12re.w64, x1re, x0re);
  95. p12re.w64 = MADD64(p12re.w64, x1im, x0im);
  96. p12im.w64 = MADD64(p12im.w64, x0re, x1im);
  97. p12im.w64 = MADD64(p12im.w64, -x0im, x1re);
  98. p22re.w64 = MADD64(p22re.w64, x0re, x0re);
  99. p22re.w64 = MADD64(p22re.w64, x0im, x0im);
  100. for (n = (NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6); n != 0; n--) {
  101. /* 4 input, 3*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
  102. x0re = x1re;
  103. x0im = x1im;
  104. x1re = XBuf[0];
  105. x1im = XBuf[1];
  106. p01re.w64 = MADD64(p01re.w64, x1re, x0re);
  107. p01re.w64 = MADD64(p01re.w64, x1im, x0im);
  108. p01im.w64 = MADD64(p01im.w64, x0re, x1im);
  109. p01im.w64 = MADD64(p01im.w64, -x0im, x1re);
  110. p11re.w64 = MADD64(p11re.w64, x0re, x0re);
  111. p11re.w64 = MADD64(p11re.w64, x0im, x0im);
  112. XBuf += (2*64);
  113. }
  114. /* these can be derived by slight changes to account for boundary conditions */
  115. p12re.w64 += p01re.w64;
  116. p12re.w64 = MADD64(p12re.w64, x1re, -x0re);
  117. p12re.w64 = MADD64(p12re.w64, x1im, -x0im);
  118. p12im.w64 += p01im.w64;
  119. p12im.w64 = MADD64(p12im.w64, x0re, -x1im);
  120. p12im.w64 = MADD64(p12im.w64, x0im, x1re);
  121. p22re.w64 += p11re.w64;
  122. p22re.w64 = MADD64(p22re.w64, x0re, -x0re);
  123. p22re.w64 = MADD64(p22re.w64, x0im, -x0im);
  124. accBuf[0] = p01re.r.lo32; accBuf[1] = p01re.r.hi32;
  125. accBuf[2] = p01im.r.lo32; accBuf[3] = p01im.r.hi32;
  126. accBuf[4] = p11re.r.lo32; accBuf[5] = p11re.r.hi32;
  127. accBuf[6] = p12re.r.lo32; accBuf[7] = p12re.r.hi32;
  128. accBuf[8] = p12im.r.lo32; accBuf[9] = p12im.r.hi32;
  129. accBuf[10] = p22re.r.lo32; accBuf[11] = p22re.r.hi32;
  130. }
  131. #endif
  132. /**************************************************************************************
  133. * Function: CalcCovariance1
  134. *
  135. * Description: calculate covariance matrix for p01, p12, p11, p22 (4.6.18.6.2)
  136. *
  137. * Inputs: buffer of low-freq samples, starting at time index 0,
  138. * freq index = patch subband
  139. *
  140. * Outputs: complex covariance elements p01re, p01im, p12re, p12im, p11re, p22re
  141. * (p11im = p22im = 0)
  142. * format = integer (Q0) * 2^N, with scalefactor N >= 0
  143. *
  144. * Return: scalefactor N
  145. *
  146. * Notes: outputs are normalized to have 1 GB (sign in at least top 2 bits)
  147. **************************************************************************************/
  148. static int CalcCovariance1(int *XBuf, int *p01reN, int *p01imN, int *p12reN, int *p12imN, int *p11reN, int *p22reN)
  149. {
  150. int accBuf[2*6];
  151. int n, z, s, loShift, hiShift, gbMask;
  152. U64 p01re, p01im, p12re, p12im, p11re, p22re;
  153. CVKernel1(XBuf, accBuf);
  154. p01re.r.lo32 = accBuf[0]; p01re.r.hi32 = accBuf[1];
  155. p01im.r.lo32 = accBuf[2]; p01im.r.hi32 = accBuf[3];
  156. p11re.r.lo32 = accBuf[4]; p11re.r.hi32 = accBuf[5];
  157. p12re.r.lo32 = accBuf[6]; p12re.r.hi32 = accBuf[7];
  158. p12im.r.lo32 = accBuf[8]; p12im.r.hi32 = accBuf[9];
  159. p22re.r.lo32 = accBuf[10]; p22re.r.hi32 = accBuf[11];
  160. /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
  161. * want to scale them down to integers (32-bit signed, Q0)
  162. * with scale factor of 2^n, n >= 0
  163. * leave 2 GB's for calculating determinant, so take top 30 non-zero bits
  164. */
  165. gbMask = ((p01re.r.hi32) ^ (p01re.r.hi32 >> 31)) | ((p01im.r.hi32) ^ (p01im.r.hi32 >> 31));
  166. gbMask |= ((p12re.r.hi32) ^ (p12re.r.hi32 >> 31)) | ((p12im.r.hi32) ^ (p12im.r.hi32 >> 31));
  167. gbMask |= ((p11re.r.hi32) ^ (p11re.r.hi32 >> 31)) | ((p22re.r.hi32) ^ (p22re.r.hi32 >> 31));
  168. if (gbMask == 0) {
  169. s = p01re.r.hi32 >> 31; gbMask = (p01re.r.lo32 ^ s) - s;
  170. s = p01im.r.hi32 >> 31; gbMask |= (p01im.r.lo32 ^ s) - s;
  171. s = p12re.r.hi32 >> 31; gbMask |= (p12re.r.lo32 ^ s) - s;
  172. s = p12im.r.hi32 >> 31; gbMask |= (p12im.r.lo32 ^ s) - s;
  173. s = p11re.r.hi32 >> 31; gbMask |= (p11re.r.lo32 ^ s) - s;
  174. s = p22re.r.hi32 >> 31; gbMask |= (p22re.r.lo32 ^ s) - s;
  175. z = 32 + CLZ(gbMask);
  176. } else {
  177. gbMask = FASTABS(p01re.r.hi32) | FASTABS(p01im.r.hi32);
  178. gbMask |= FASTABS(p12re.r.hi32) | FASTABS(p12im.r.hi32);
  179. gbMask |= FASTABS(p11re.r.hi32) | FASTABS(p22re.r.hi32);
  180. z = CLZ(gbMask);
  181. }
  182. n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */
  183. if (n <= 30) {
  184. loShift = (30 - n);
  185. *p01reN = p01re.r.lo32 << loShift; *p01imN = p01im.r.lo32 << loShift;
  186. *p12reN = p12re.r.lo32 << loShift; *p12imN = p12im.r.lo32 << loShift;
  187. *p11reN = p11re.r.lo32 << loShift; *p22reN = p22re.r.lo32 << loShift;
  188. return -(loShift + 2*FBITS_OUT_QMFA);
  189. } else if (n < 32 + 30) {
  190. loShift = (n - 30);
  191. hiShift = 32 - loShift;
  192. *p01reN = (p01re.r.hi32 << hiShift) | (p01re.r.lo32 >> loShift);
  193. *p01imN = (p01im.r.hi32 << hiShift) | (p01im.r.lo32 >> loShift);
  194. *p12reN = (p12re.r.hi32 << hiShift) | (p12re.r.lo32 >> loShift);
  195. *p12imN = (p12im.r.hi32 << hiShift) | (p12im.r.lo32 >> loShift);
  196. *p11reN = (p11re.r.hi32 << hiShift) | (p11re.r.lo32 >> loShift);
  197. *p22reN = (p22re.r.hi32 << hiShift) | (p22re.r.lo32 >> loShift);
  198. return (loShift - 2*FBITS_OUT_QMFA);
  199. } else {
  200. hiShift = n - (32 + 30);
  201. *p01reN = p01re.r.hi32 >> hiShift; *p01imN = p01im.r.hi32 >> hiShift;
  202. *p12reN = p12re.r.hi32 >> hiShift; *p12imN = p12im.r.hi32 >> hiShift;
  203. *p11reN = p11re.r.hi32 >> hiShift; *p22reN = p22re.r.hi32 >> hiShift;
  204. return (32 - 2*FBITS_OUT_QMFA - hiShift);
  205. }
  206. return 0;
  207. }
  208. /**************************************************************************************
  209. * Function: CVKernel2
  210. *
  211. * Description: kernel of covariance matrix calculation for p02
  212. *
  213. * Inputs: buffer of low-freq samples, starting at time index = 0,
  214. * freq index = patch subband
  215. *
  216. * Outputs: 64-bit accumulators for p02re, p02im stored in accBuf
  217. *
  218. * Return: none
  219. *
  220. * Notes: this is carefully written to be efficient on ARM
  221. * use the assembly code version in sbrcov.s when building for ARM!
  222. **************************************************************************************/
  223. #if (defined (XXXX__arm) && defined (__ARMCC_VERSION)) || (defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)) || (defined(__GNUC__) && defined(XXXX__arm__))
  224. #ifdef __cplusplus
  225. extern "C"
  226. #endif
  227. void CVKernel2(int *XBuf, int *accBuf);
  228. #else
  229. void CVKernel2(int *XBuf, int *accBuf)
  230. {
  231. U64 p02re, p02im;
  232. int n, x0re, x0im, x1re, x1im, x2re, x2im;
  233. p02re.w64 = p02im.w64 = 0;
  234. x0re = XBuf[0];
  235. x0im = XBuf[1];
  236. XBuf += (2*64);
  237. x1re = XBuf[0];
  238. x1im = XBuf[1];
  239. XBuf += (2*64);
  240. for (n = (NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6); n != 0; n--) {
  241. /* 6 input, 2*2 acc, 1 ptr, 1 loop counter = 12 registers (use same for x0im, -x0im) */
  242. x2re = XBuf[0];
  243. x2im = XBuf[1];
  244. p02re.w64 = MADD64(p02re.w64, x2re, x0re);
  245. p02re.w64 = MADD64(p02re.w64, x2im, x0im);
  246. p02im.w64 = MADD64(p02im.w64, x0re, x2im);
  247. p02im.w64 = MADD64(p02im.w64, -x0im, x2re);
  248. x0re = x1re;
  249. x0im = x1im;
  250. x1re = x2re;
  251. x1im = x2im;
  252. XBuf += (2*64);
  253. }
  254. accBuf[0] = p02re.r.lo32;
  255. accBuf[1] = p02re.r.hi32;
  256. accBuf[2] = p02im.r.lo32;
  257. accBuf[3] = p02im.r.hi32;
  258. }
  259. #endif
  260. /**************************************************************************************
  261. * Function: CalcCovariance2
  262. *
  263. * Description: calculate covariance matrix for p02 (4.6.18.6.2)
  264. *
  265. * Inputs: buffer of low-freq samples, starting at time index = 0,
  266. * freq index = patch subband
  267. *
  268. * Outputs: complex covariance element p02re, p02im
  269. * format = integer (Q0) * 2^N, with scalefactor N >= 0
  270. *
  271. * Return: scalefactor N
  272. *
  273. * Notes: outputs are normalized to have 1 GB (sign in at least top 2 bits)
  274. **************************************************************************************/
  275. static int CalcCovariance2(int *XBuf, int *p02reN, int *p02imN)
  276. {
  277. U64 p02re, p02im;
  278. int n, z, s, loShift, hiShift, gbMask;
  279. int accBuf[2*2];
  280. CVKernel2(XBuf, accBuf);
  281. p02re.r.lo32 = accBuf[0];
  282. p02re.r.hi32 = accBuf[1];
  283. p02im.r.lo32 = accBuf[2];
  284. p02im.r.hi32 = accBuf[3];
  285. /* 64-bit accumulators now have 2*FBITS_OUT_QMFA fraction bits
  286. * want to scale them down to integers (32-bit signed, Q0)
  287. * with scale factor of 2^n, n >= 0
  288. * leave 1 GB for calculating determinant, so take top 30 non-zero bits
  289. */
  290. gbMask = ((p02re.r.hi32) ^ (p02re.r.hi32 >> 31)) | ((p02im.r.hi32) ^ (p02im.r.hi32 >> 31));
  291. if (gbMask == 0) {
  292. s = p02re.r.hi32 >> 31; gbMask = (p02re.r.lo32 ^ s) - s;
  293. s = p02im.r.hi32 >> 31; gbMask |= (p02im.r.lo32 ^ s) - s;
  294. z = 32 + CLZ(gbMask);
  295. } else {
  296. gbMask = FASTABS(p02re.r.hi32) | FASTABS(p02im.r.hi32);
  297. z = CLZ(gbMask);
  298. }
  299. n = 64 - z; /* number of non-zero bits in bottom of 64-bit word */
  300. if (n <= 30) {
  301. loShift = (30 - n);
  302. *p02reN = p02re.r.lo32 << loShift;
  303. *p02imN = p02im.r.lo32 << loShift;
  304. return -(loShift + 2*FBITS_OUT_QMFA);
  305. } else if (n < 32 + 30) {
  306. loShift = (n - 30);
  307. hiShift = 32 - loShift;
  308. *p02reN = (p02re.r.hi32 << hiShift) | (p02re.r.lo32 >> loShift);
  309. *p02imN = (p02im.r.hi32 << hiShift) | (p02im.r.lo32 >> loShift);
  310. return (loShift - 2*FBITS_OUT_QMFA);
  311. } else {
  312. hiShift = n - (32 + 30);
  313. *p02reN = p02re.r.hi32 >> hiShift;
  314. *p02imN = p02im.r.hi32 >> hiShift;
  315. return (32 - 2*FBITS_OUT_QMFA - hiShift);
  316. }
  317. return 0;
  318. }
  319. /**************************************************************************************
  320. * Function: CalcLPCoefs
  321. *
  322. * Description: calculate linear prediction coefficients for one subband (4.6.18.6.2)
  323. *
  324. * Inputs: buffer of low-freq samples, starting at time index = 0,
  325. * freq index = patch subband
  326. * number of guard bits in input sample buffer
  327. *
  328. * Outputs: complex LP coefficients a0re, a0im, a1re, a1im, format = Q29
  329. *
  330. * Return: none
  331. *
  332. * Notes: output coefficients (a0re, a0im, a1re, a1im) clipped to range (-4, 4)
  333. * if the comples coefficients have magnitude >= 4.0, they are all
  334. * set to 0 (see spec)
  335. **************************************************************************************/
  336. static void CalcLPCoefs(int *XBuf, int *a0re, int *a0im, int *a1re, int *a1im, int gb)
  337. {
  338. int zFlag, n1, n2, nd, d, dInv, tre, tim;
  339. int p01re, p01im, p02re, p02im, p12re, p12im, p11re, p22re;
  340. /* pre-scale to avoid overflow - probably never happens in practice (see QMFA)
  341. * max bit growth per accumulator = 38*2 = 76 mul-adds (X * X)
  342. * using 64-bit MADD, so if X has n guard bits, X*X has 2n+1 guard bits
  343. * gain 1 extra sign bit per multiply, so ensure ceil(log2(76/2) / 2) = 3 guard bits on inputs
  344. */
  345. if (gb < 3) {
  346. nd = 3 - gb;
  347. for (n1 = (NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) {
  348. XBuf[0] >>= nd; XBuf[1] >>= nd;
  349. XBuf += (2*64);
  350. }
  351. XBuf -= (2*64*(NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6 + 2));
  352. }
  353. /* calculate covariance elements */
  354. n1 = CalcCovariance1(XBuf, &p01re, &p01im, &p12re, &p12im, &p11re, &p22re);
  355. n2 = CalcCovariance2(XBuf, &p02re, &p02im);
  356. /* normalize everything to larger power of 2 scalefactor, call it n1 */
  357. if (n1 < n2) {
  358. nd = MIN(n2 - n1, 31);
  359. p01re >>= nd; p01im >>= nd;
  360. p12re >>= nd; p12im >>= nd;
  361. p11re >>= nd; p22re >>= nd;
  362. n1 = n2;
  363. } else if (n1 > n2) {
  364. nd = MIN(n1 - n2, 31);
  365. p02re >>= nd; p02im >>= nd;
  366. }
  367. /* calculate determinant of covariance matrix (at least 1 GB in pXX) */
  368. d = MULSHIFT32(p12re, p12re) + MULSHIFT32(p12im, p12im);
  369. d = MULSHIFT32(d, RELAX_COEF) << 1;
  370. d = MULSHIFT32(p11re, p22re) - d;
  371. ASSERT(d >= 0); /* should never be < 0 */
  372. zFlag = 0;
  373. *a0re = *a0im = 0;
  374. *a1re = *a1im = 0;
  375. if (d > 0) {
  376. /* input = Q31 d = Q(-2*n1 - 32 + nd) = Q31 * 2^(31 + 2*n1 + 32 - nd)
  377. * inverse = Q29 dInv = Q29 * 2^(-31 - 2*n1 - 32 + nd) = Q(29 + 31 + 2*n1 + 32 - nd)
  378. *
  379. * numerator has same Q format as d, since it's sum of normalized squares
  380. * so num * inverse = Q(-2*n1 - 32) * Q(29 + 31 + 2*n1 + 32 - nd)
  381. * = Q(29 + 31 - nd), drop low 32 in MULSHIFT32
  382. * = Q(29 + 31 - 32 - nd) = Q(28 - nd)
  383. */
  384. nd = CLZ(d) - 1;
  385. d <<= nd;
  386. dInv = InvRNormalized(d);
  387. /* 1 GB in pXX */
  388. tre = MULSHIFT32(p01re, p12re) - MULSHIFT32(p01im, p12im) - MULSHIFT32(p02re, p11re);
  389. tre = MULSHIFT32(tre, dInv);
  390. tim = MULSHIFT32(p01re, p12im) + MULSHIFT32(p01im, p12re) - MULSHIFT32(p02im, p11re);
  391. tim = MULSHIFT32(tim, dInv);
  392. /* if d is extremely small, just set coefs to 0 (would have poor precision anyway) */
  393. if (nd > 28 || (FASTABS(tre) >> (28 - nd)) >= 4 || (FASTABS(tim) >> (28 - nd)) >= 4) {
  394. zFlag = 1;
  395. } else {
  396. *a1re = tre << (FBITS_LPCOEFS - 28 + nd); /* i.e. convert Q(28 - nd) to Q(29) */
  397. *a1im = tim << (FBITS_LPCOEFS - 28 + nd);
  398. }
  399. }
  400. if (p11re) {
  401. /* input = Q31 p11re = Q(-n1 + nd) = Q31 * 2^(31 + n1 - nd)
  402. * inverse = Q29 dInv = Q29 * 2^(-31 - n1 + nd) = Q(29 + 31 + n1 - nd)
  403. *
  404. * numerator is Q(-n1 - 3)
  405. * so num * inverse = Q(-n1 - 3) * Q(29 + 31 + n1 - nd)
  406. * = Q(29 + 31 - 3 - nd), drop low 32 in MULSHIFT32
  407. * = Q(29 + 31 - 3 - 32 - nd) = Q(25 - nd)
  408. */
  409. nd = CLZ(p11re) - 1; /* assume positive */
  410. p11re <<= nd;
  411. dInv = InvRNormalized(p11re);
  412. /* a1re, a1im = Q29, so scaled by (n1 + 3) */
  413. tre = (p01re >> 3) + MULSHIFT32(p12re, *a1re) + MULSHIFT32(p12im, *a1im);
  414. tre = -MULSHIFT32(tre, dInv);
  415. tim = (p01im >> 3) - MULSHIFT32(p12im, *a1re) + MULSHIFT32(p12re, *a1im);
  416. tim = -MULSHIFT32(tim, dInv);
  417. if (nd > 25 || (FASTABS(tre) >> (25 - nd)) >= 4 || (FASTABS(tim) >> (25 - nd)) >= 4) {
  418. zFlag = 1;
  419. } else {
  420. *a0re = tre << (FBITS_LPCOEFS - 25 + nd); /* i.e. convert Q(25 - nd) to Q(29) */
  421. *a0im = tim << (FBITS_LPCOEFS - 25 + nd);
  422. }
  423. }
  424. /* see 4.6.18.6.2 - if magnitude of a0 or a1 >= 4 then a0 = a1 = 0
  425. * i.e. a0re < 4, a0im < 4, a1re < 4, a1im < 4
  426. * Q29*Q29 = Q26
  427. */
  428. if (zFlag || MULSHIFT32(*a0re, *a0re) + MULSHIFT32(*a0im, *a0im) >= MAG_16 || MULSHIFT32(*a1re, *a1re) + MULSHIFT32(*a1im, *a1im) >= MAG_16) {
  429. *a0re = *a0im = 0;
  430. *a1re = *a1im = 0;
  431. }
  432. /* no need to clip - we never changed the XBuf data, just used it to calculate a0 and a1 */
  433. if (gb < 3) {
  434. nd = 3 - gb;
  435. for (n1 = (NUM_TIME_SLOTS*SAMPLES_PER_SLOT + 6 + 2); n1 != 0; n1--) {
  436. XBuf[0] <<= nd; XBuf[1] <<= nd;
  437. XBuf += (2*64);
  438. }
  439. }
  440. }
  441. /**************************************************************************************
  442. * Function: GenerateHighFreq
  443. *
  444. * Description: generate high frequencies with SBR (4.6.18.6)
  445. *
  446. * Inputs: initialized PSInfoSBR struct
  447. * initialized SBRGrid struct for this channel
  448. * initialized SBRFreq struct for this SCE/CPE block
  449. * initialized SBRChan struct for this channel
  450. * index of current channel (0 for SCE, 0 or 1 for CPE)
  451. *
  452. * Outputs: new high frequency samples starting at frequency kStart
  453. *
  454. * Return: none
  455. **************************************************************************************/
  456. void GenerateHighFreq(PSInfoSBR *psi, SBRGrid *sbrGrid, SBRFreq *sbrFreq, SBRChan *sbrChan, int ch)
  457. {
  458. int band, newBW, c, t, gb, gbMask, gbIdx;
  459. int currPatch, p, x, k, g, i, iStart, iEnd, bw, bwsq;
  460. int a0re, a0im, a1re, a1im;
  461. int x1re, x1im, x2re, x2im;
  462. int ACCre, ACCim;
  463. int *XBufLo, *XBufHi;
  464. (void) ch;
  465. /* calculate array of chirp factors */
  466. for (band = 0; band < sbrFreq->numNoiseFloorBands; band++) {
  467. c = sbrChan->chirpFact[band]; /* previous (bwArray') */
  468. newBW = newBWTab[sbrChan->invfMode[0][band]][sbrChan->invfMode[1][band]];
  469. /* weighted average of new and old (can't overflow - total gain = 1.0) */
  470. if (newBW < c)
  471. t = MULSHIFT32(newBW, 0x60000000) + MULSHIFT32(0x20000000, c); /* new is smaller: 0.75*new + 0.25*old */
  472. else
  473. t = MULSHIFT32(newBW, 0x74000000) + MULSHIFT32(0x0c000000, c); /* new is larger: 0.90625*new + 0.09375*old */
  474. t <<= 1;
  475. if (t < 0x02000000) /* below 0.015625, clip to 0 */
  476. t = 0;
  477. if (t > 0x7f800000) /* clip to 0.99609375 */
  478. t = 0x7f800000;
  479. /* save curr as prev for next time */
  480. sbrChan->chirpFact[band] = t;
  481. sbrChan->invfMode[0][band] = sbrChan->invfMode[1][band];
  482. }
  483. iStart = sbrGrid->envTimeBorder[0] + HF_ADJ;
  484. iEnd = sbrGrid->envTimeBorder[sbrGrid->numEnv] + HF_ADJ;
  485. /* generate new high freqs from low freqs, patches, and chirp factors */
  486. k = sbrFreq->kStart;
  487. g = 0;
  488. bw = sbrChan->chirpFact[g];
  489. bwsq = MULSHIFT32(bw, bw) << 1;
  490. gbMask = (sbrChan->gbMask[0] | sbrChan->gbMask[1]); /* older 32 | newer 8 */
  491. gb = CLZ(gbMask) - 1;
  492. for (currPatch = 0; currPatch < sbrFreq->numPatches; currPatch++) {
  493. for (x = 0; x < sbrFreq->patchNumSubbands[currPatch]; x++) {
  494. /* map k to corresponding noise floor band */
  495. if (k >= sbrFreq->freqNoise[g+1]) {
  496. g++;
  497. bw = sbrChan->chirpFact[g]; /* Q31 */
  498. bwsq = MULSHIFT32(bw, bw) << 1; /* Q31 */
  499. }
  500. p = sbrFreq->patchStartSubband[currPatch] + x; /* low QMF band */
  501. XBufHi = psi->XBuf[iStart][k];
  502. if (bw) {
  503. CalcLPCoefs(psi->XBuf[0][p], &a0re, &a0im, &a1re, &a1im, gb);
  504. a0re = MULSHIFT32(bw, a0re); /* Q31 * Q29 = Q28 */
  505. a0im = MULSHIFT32(bw, a0im);
  506. a1re = MULSHIFT32(bwsq, a1re);
  507. a1im = MULSHIFT32(bwsq, a1im);
  508. XBufLo = psi->XBuf[iStart-2][p];
  509. x2re = XBufLo[0]; /* RE{XBuf[n-2]} */
  510. x2im = XBufLo[1]; /* IM{XBuf[n-2]} */
  511. XBufLo += (64*2);
  512. x1re = XBufLo[0]; /* RE{XBuf[n-1]} */
  513. x1im = XBufLo[1]; /* IM{XBuf[n-1]} */
  514. XBufLo += (64*2);
  515. for (i = iStart; i < iEnd; i++) {
  516. /* a0re/im, a1re/im are Q28 with at least 1 GB,
  517. * so the summing for AACre/im is fine (1 GB in, plus 1 from MULSHIFT32)
  518. */
  519. ACCre = MULSHIFT32(x2re, a1re) - MULSHIFT32(x2im, a1im);
  520. ACCim = MULSHIFT32(x2re, a1im) + MULSHIFT32(x2im, a1re);
  521. x2re = x1re;
  522. x2im = x1im;
  523. ACCre += MULSHIFT32(x1re, a0re) - MULSHIFT32(x1im, a0im);
  524. ACCim += MULSHIFT32(x1re, a0im) + MULSHIFT32(x1im, a0re);
  525. x1re = XBufLo[0]; /* RE{XBuf[n]} */
  526. x1im = XBufLo[1]; /* IM{XBuf[n]} */
  527. XBufLo += (64*2);
  528. /* lost 4 fbits when scaling by a0re/im, a1re/im (Q28) */
  529. CLIP_2N_SHIFT30(ACCre, 4);
  530. ACCre += x1re;
  531. CLIP_2N_SHIFT30(ACCim, 4);
  532. ACCim += x1im;
  533. XBufHi[0] = ACCre;
  534. XBufHi[1] = ACCim;
  535. XBufHi += (64*2);
  536. /* update guard bit masks */
  537. gbMask = FASTABS(ACCre);
  538. gbMask |= FASTABS(ACCim);
  539. gbIdx = (i >> 5) & 0x01; /* 0 if i < 32, 1 if i >= 32 */
  540. sbrChan->gbMask[gbIdx] |= gbMask;
  541. }
  542. } else {
  543. XBufLo = (int *)psi->XBuf[iStart][p];
  544. for (i = iStart; i < iEnd; i++) {
  545. XBufHi[0] = XBufLo[0];
  546. XBufHi[1] = XBufLo[1];
  547. XBufLo += (64*2);
  548. XBufHi += (64*2);
  549. }
  550. }
  551. k++; /* high QMF band */
  552. }
  553. }
  554. }