dp_dec.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. * Copyright (c) 2011 Apple Inc. All rights reserved.
  3. *
  4. * @APPLE_APACHE_LICENSE_HEADER_START@
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. * @APPLE_APACHE_LICENSE_HEADER_END@
  19. */
  20. /*
  21. File: dp_dec.c
  22. Contains: Dynamic Predictor decode routines
  23. Copyright: (c) 2001-2011 Apple, Inc.
  24. */
  25. #include "dplib.h"
  26. #include <string.h>
  27. #if __GNUC__
  28. #define ALWAYS_INLINE __attribute__((always_inline))
  29. #else
  30. #define ALWAYS_INLINE
  31. #endif
  32. #if TARGET_CPU_PPC && (__MWERKS__ >= 0x3200)
  33. // align loops to a 16 byte boundary to make the G5 happy
  34. #pragma function_align 16
  35. #define LOOP_ALIGN asm { align 16 }
  36. #else
  37. #define LOOP_ALIGN
  38. #endif
  39. static inline int32_t ALWAYS_INLINE sign_of_int( int32_t i )
  40. {
  41. int32_t negishift;
  42. negishift = ((uint32_t)-i) >> 31;
  43. return negishift | (i >> 31);
  44. }
  45. void unpc_block( int32_t * pc1, int32_t * out, int32_t num, int16_t * coefs, int32_t numactive, uint32_t chanbits, uint32_t denshift )
  46. {
  47. register int16_t a0, a1, a2, a3;
  48. register int32_t b0, b1, b2, b3;
  49. int32_t j, k, lim;
  50. int32_t sum1, sg, sgn, top, dd;
  51. int32_t * pout;
  52. int32_t del, del0;
  53. uint32_t chanshift = 32 - chanbits;
  54. int32_t denhalf = 1<<(denshift-1);
  55. out[0] = pc1[0];
  56. if ( numactive == 0 )
  57. {
  58. // just copy if numactive == 0 (but don't bother if in/out pointers the same)
  59. if ( (num > 1) && (pc1 != out) )
  60. memcpy( &out[1], &pc1[1], (num - 1) * sizeof(int32_t) );
  61. return;
  62. }
  63. if ( numactive == 31 )
  64. {
  65. // short-circuit if numactive == 31
  66. int32_t prev;
  67. /* this code is written such that the in/out buffers can be the same
  68. to conserve buffer space on embedded devices like the iPod
  69. (original code)
  70. for ( j = 1; j < num; j++ )
  71. del = pc1[j] + out[j-1];
  72. out[j] = (del << chanshift) >> chanshift;
  73. */
  74. prev = out[0];
  75. for ( j = 1; j < num; j++ )
  76. {
  77. del = pc1[j] + prev;
  78. prev = (del << chanshift) >> chanshift;
  79. out[j] = prev;
  80. }
  81. return;
  82. }
  83. for ( j = 1; j <= numactive; j++ )
  84. {
  85. del = pc1[j] + out[j-1];
  86. out[j] = (del << chanshift) >> chanshift;
  87. }
  88. lim = numactive + 1;
  89. if ( numactive == 4 )
  90. {
  91. // optimization for numactive == 4
  92. register int16_t a0, a1, a2, a3;
  93. register int32_t b0, b1, b2, b3;
  94. a0 = coefs[0];
  95. a1 = coefs[1];
  96. a2 = coefs[2];
  97. a3 = coefs[3];
  98. for ( j = lim; j < num; j++ )
  99. {
  100. LOOP_ALIGN
  101. top = out[j - lim];
  102. pout = out + j - 1;
  103. b0 = top - pout[0];
  104. b1 = top - pout[-1];
  105. b2 = top - pout[-2];
  106. b3 = top - pout[-3];
  107. sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3) >> denshift;
  108. del = pc1[j];
  109. del0 = del;
  110. sg = sign_of_int(del);
  111. del += top + sum1;
  112. out[j] = (del << chanshift) >> chanshift;
  113. if ( sg > 0 )
  114. {
  115. sgn = sign_of_int( b3 );
  116. a3 -= sgn;
  117. del0 -= (4 - 3) * ((sgn * b3) >> denshift);
  118. if ( del0 <= 0 )
  119. continue;
  120. sgn = sign_of_int( b2 );
  121. a2 -= sgn;
  122. del0 -= (4 - 2) * ((sgn * b2) >> denshift);
  123. if ( del0 <= 0 )
  124. continue;
  125. sgn = sign_of_int( b1 );
  126. a1 -= sgn;
  127. del0 -= (4 - 1) * ((sgn * b1) >> denshift);
  128. if ( del0 <= 0 )
  129. continue;
  130. a0 -= sign_of_int( b0 );
  131. }
  132. else if ( sg < 0 )
  133. {
  134. // note: to avoid unnecessary negations, we flip the value of "sgn"
  135. sgn = -sign_of_int( b3 );
  136. a3 -= sgn;
  137. del0 -= (4 - 3) * ((sgn * b3) >> denshift);
  138. if ( del0 >= 0 )
  139. continue;
  140. sgn = -sign_of_int( b2 );
  141. a2 -= sgn;
  142. del0 -= (4 - 2) * ((sgn * b2) >> denshift);
  143. if ( del0 >= 0 )
  144. continue;
  145. sgn = -sign_of_int( b1 );
  146. a1 -= sgn;
  147. del0 -= (4 - 1) * ((sgn * b1) >> denshift);
  148. if ( del0 >= 0 )
  149. continue;
  150. a0 += sign_of_int( b0 );
  151. }
  152. }
  153. coefs[0] = a0;
  154. coefs[1] = a1;
  155. coefs[2] = a2;
  156. coefs[3] = a3;
  157. }
  158. else if ( numactive == 8 )
  159. {
  160. register int16_t a4, a5, a6, a7;
  161. register int32_t b4, b5, b6, b7;
  162. // optimization for numactive == 8
  163. a0 = coefs[0];
  164. a1 = coefs[1];
  165. a2 = coefs[2];
  166. a3 = coefs[3];
  167. a4 = coefs[4];
  168. a5 = coefs[5];
  169. a6 = coefs[6];
  170. a7 = coefs[7];
  171. for ( j = lim; j < num; j++ )
  172. {
  173. LOOP_ALIGN
  174. top = out[j - lim];
  175. pout = out + j - 1;
  176. b0 = top - (*pout--);
  177. b1 = top - (*pout--);
  178. b2 = top - (*pout--);
  179. b3 = top - (*pout--);
  180. b4 = top - (*pout--);
  181. b5 = top - (*pout--);
  182. b6 = top - (*pout--);
  183. b7 = top - (*pout);
  184. pout += 8;
  185. sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3
  186. - a4 * b4 - a5 * b5 - a6 * b6 - a7 * b7) >> denshift;
  187. del = pc1[j];
  188. del0 = del;
  189. sg = sign_of_int(del);
  190. del += top + sum1;
  191. out[j] = (del << chanshift) >> chanshift;
  192. if ( sg > 0 )
  193. {
  194. sgn = sign_of_int( b7 );
  195. a7 -= sgn;
  196. del0 -= 1 * ((sgn * b7) >> denshift);
  197. if ( del0 <= 0 )
  198. continue;
  199. sgn = sign_of_int( b6 );
  200. a6 -= sgn;
  201. del0 -= 2 * ((sgn * b6) >> denshift);
  202. if ( del0 <= 0 )
  203. continue;
  204. sgn = sign_of_int( b5 );
  205. a5 -= sgn;
  206. del0 -= 3 * ((sgn * b5) >> denshift);
  207. if ( del0 <= 0 )
  208. continue;
  209. sgn = sign_of_int( b4 );
  210. a4 -= sgn;
  211. del0 -= 4 * ((sgn * b4) >> denshift);
  212. if ( del0 <= 0 )
  213. continue;
  214. sgn = sign_of_int( b3 );
  215. a3 -= sgn;
  216. del0 -= 5 * ((sgn * b3) >> denshift);
  217. if ( del0 <= 0 )
  218. continue;
  219. sgn = sign_of_int( b2 );
  220. a2 -= sgn;
  221. del0 -= 6 * ((sgn * b2) >> denshift);
  222. if ( del0 <= 0 )
  223. continue;
  224. sgn = sign_of_int( b1 );
  225. a1 -= sgn;
  226. del0 -= 7 * ((sgn * b1) >> denshift);
  227. if ( del0 <= 0 )
  228. continue;
  229. a0 -= sign_of_int( b0 );
  230. }
  231. else if ( sg < 0 )
  232. {
  233. // note: to avoid unnecessary negations, we flip the value of "sgn"
  234. sgn = -sign_of_int( b7 );
  235. a7 -= sgn;
  236. del0 -= 1 * ((sgn * b7) >> denshift);
  237. if ( del0 >= 0 )
  238. continue;
  239. sgn = -sign_of_int( b6 );
  240. a6 -= sgn;
  241. del0 -= 2 * ((sgn * b6) >> denshift);
  242. if ( del0 >= 0 )
  243. continue;
  244. sgn = -sign_of_int( b5 );
  245. a5 -= sgn;
  246. del0 -= 3 * ((sgn * b5) >> denshift);
  247. if ( del0 >= 0 )
  248. continue;
  249. sgn = -sign_of_int( b4 );
  250. a4 -= sgn;
  251. del0 -= 4 * ((sgn * b4) >> denshift);
  252. if ( del0 >= 0 )
  253. continue;
  254. sgn = -sign_of_int( b3 );
  255. a3 -= sgn;
  256. del0 -= 5 * ((sgn * b3) >> denshift);
  257. if ( del0 >= 0 )
  258. continue;
  259. sgn = -sign_of_int( b2 );
  260. a2 -= sgn;
  261. del0 -= 6 * ((sgn * b2) >> denshift);
  262. if ( del0 >= 0 )
  263. continue;
  264. sgn = -sign_of_int( b1 );
  265. a1 -= sgn;
  266. del0 -= 7 * ((sgn * b1) >> denshift);
  267. if ( del0 >= 0 )
  268. continue;
  269. a0 += sign_of_int( b0 );
  270. }
  271. }
  272. coefs[0] = a0;
  273. coefs[1] = a1;
  274. coefs[2] = a2;
  275. coefs[3] = a3;
  276. coefs[4] = a4;
  277. coefs[5] = a5;
  278. coefs[6] = a6;
  279. coefs[7] = a7;
  280. }
  281. else
  282. {
  283. // general case
  284. for ( j = lim; j < num; j++ )
  285. {
  286. LOOP_ALIGN
  287. sum1 = 0;
  288. pout = out + j - 1;
  289. top = out[j-lim];
  290. for ( k = 0; k < numactive; k++ )
  291. sum1 += coefs[k] * (pout[-k] - top);
  292. del = pc1[j];
  293. del0 = del;
  294. sg = sign_of_int( del );
  295. del += top + ((sum1 + denhalf) >> denshift);
  296. out[j] = (del << chanshift) >> chanshift;
  297. if ( sg > 0 )
  298. {
  299. for ( k = (numactive - 1); k >= 0; k-- )
  300. {
  301. dd = top - pout[-k];
  302. sgn = sign_of_int( dd );
  303. coefs[k] -= sgn;
  304. del0 -= (numactive - k) * ((sgn * dd) >> denshift);
  305. if ( del0 <= 0 )
  306. break;
  307. }
  308. }
  309. else if ( sg < 0 )
  310. {
  311. for ( k = (numactive - 1); k >= 0; k-- )
  312. {
  313. dd = top - pout[-k];
  314. sgn = sign_of_int( dd );
  315. coefs[k] += sgn;
  316. del0 -= (numactive - k) * ((-sgn * dd) >> denshift);
  317. if ( del0 >= 0 )
  318. break;
  319. }
  320. }
  321. }
  322. }
  323. }