assembly.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. /* ***** BEGIN LICENSE BLOCK *****
  2. * Version: RCSL 1.0/RPSL 1.0
  3. *
  4. * Portions Copyright (c) 1995-2002 RealNetworks, Inc. All Rights Reserved.
  5. *
  6. * The contents of this file, and the files included with this file, are
  7. * subject to the current version of the RealNetworks Public Source License
  8. * Version 1.0 (the "RPSL") available at
  9. * http://www.helixcommunity.org/content/rpsl unless you have licensed
  10. * the file under the RealNetworks Community Source License Version 1.0
  11. * (the "RCSL") available at http://www.helixcommunity.org/content/rcsl,
  12. * in which case the RCSL will apply. You may also obtain the license terms
  13. * directly from RealNetworks. You may not use this file except in
  14. * compliance with the RPSL or, if you have a valid RCSL with RealNetworks
  15. * applicable to this file, the RCSL. Please see the applicable RPSL or
  16. * RCSL for the rights, obligations and limitations governing use of the
  17. * contents of the file.
  18. *
  19. * This file is part of the Helix DNA Technology. RealNetworks is the
  20. * developer of the Original Code and owns the copyrights in the portions
  21. * it created.
  22. *
  23. * This file, and the files included with this file, is distributed and made
  24. * available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  25. * EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  26. * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
  27. * FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  28. *
  29. * Technology Compatibility Kit Test Suite(s) Location:
  30. * http://www.helixcommunity.org/content/tck
  31. *
  32. * Contributor(s):
  33. *
  34. * ***** END LICENSE BLOCK ***** */
  35. /**************************************************************************************
  36. * Fixed-point MP3 decoder
  37. * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
  38. * June 2003
  39. *
  40. * assembly.h - assembly language functions and prototypes for supported platforms
  41. *
  42. * - inline rountines with access to 64-bit multiply results
  43. * - x86 (_WIN32) and ARM (ARM_ADS, _WIN32_WCE) versions included
  44. * - some inline functions are mix of asm and C for speed
  45. * - some functions are in native asm files, so only the prototype is given here
  46. *
  47. * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y), returns top 32 bits of 64-bit result
  48. * FASTABS(x) branchless absolute value of signed integer x
  49. * CLZ(x) count leading zeros in x
  50. * MADD64(sum, x, y) (Windows only) sum [64-bit] += x [32-bit] * y [32-bit]
  51. * SHL64(sum, x, y) (Windows only) 64-bit left shift using __int64
  52. * SAR64(sum, x, y) (Windows only) 64-bit right shift using __int64
  53. */
  54. #ifndef _ASSEMBLY_H
  55. #define _ASSEMBLY_H
  56. #if (defined _WIN32 && !defined _WIN32_WCE) || (defined __WINS__ && defined _SYMBIAN) || defined(_OPENWAVE_SIMULATOR) || defined(WINCE_EMULATOR) /* Symbian emulator for Ix86 */
  57. #pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
  58. static __inline int MULSHIFT32(int x, int y)
  59. {
  60. __asm {
  61. mov eax, x
  62. imul y
  63. mov eax, edx
  64. }
  65. }
  66. static __inline int FASTABS(int x)
  67. {
  68. int sign;
  69. sign = x >> (sizeof(int) * 8 - 1);
  70. x ^= sign;
  71. x -= sign;
  72. return x;
  73. }
  74. static __inline int CLZ(int x)
  75. {
  76. int numZeros;
  77. if (!x)
  78. return (sizeof(int) * 8);
  79. numZeros = 0;
  80. while (!(x & 0x80000000)) {
  81. numZeros++;
  82. x <<= 1;
  83. }
  84. return numZeros;
  85. }
  86. /* MADD64, SHL64, SAR64:
  87. * write in assembly to avoid dependency on run-time lib for 64-bit shifts, muls
  88. * (sometimes compiler thunks to function calls instead of code generating)
  89. * required for Symbian emulator
  90. */
  91. #ifdef __CW32__
  92. typedef long long Word64;
  93. #else
  94. typedef __int64 Word64;
  95. #endif
  96. static __inline Word64 MADD64(Word64 sum, int x, int y)
  97. {
  98. unsigned int sumLo = ((unsigned int *)&sum)[0];
  99. int sumHi = ((int *)&sum)[1];
  100. __asm {
  101. mov eax, x
  102. imul y
  103. add eax, sumLo
  104. adc edx, sumHi
  105. }
  106. /* equivalent to return (sum + ((__int64)x * y)); */
  107. }
  108. static __inline Word64 SHL64(Word64 x, int n)
  109. {
  110. unsigned int xLo = ((unsigned int *)&x)[0];
  111. int xHi = ((int *)&x)[1];
  112. unsigned char nb = (unsigned char)n;
  113. if (n < 32) {
  114. __asm {
  115. mov edx, xHi
  116. mov eax, xLo
  117. mov cl, nb
  118. shld edx, eax, cl
  119. shl eax, cl
  120. }
  121. } else if (n < 64) {
  122. /* shl masks cl to 0x1f */
  123. __asm {
  124. mov edx, xLo
  125. mov cl, nb
  126. xor eax, eax
  127. shl edx, cl
  128. }
  129. } else {
  130. __asm {
  131. xor edx, edx
  132. xor eax, eax
  133. }
  134. }
  135. }
  136. static __inline Word64 SAR64(Word64 x, int n)
  137. {
  138. unsigned int xLo = ((unsigned int *)&x)[0];
  139. int xHi = ((int *)&x)[1];
  140. unsigned char nb = (unsigned char)n;
  141. if (n < 32) {
  142. __asm {
  143. mov edx, xHi
  144. mov eax, xLo
  145. mov cl, nb
  146. shrd eax, edx, cl
  147. sar edx, cl
  148. }
  149. } else if (n < 64) {
  150. /* sar masks cl to 0x1f */
  151. __asm {
  152. mov edx, xHi
  153. mov eax, xHi
  154. mov cl, nb
  155. sar edx, 31
  156. sar eax, cl
  157. }
  158. } else {
  159. __asm {
  160. sar xHi, 31
  161. mov eax, xHi
  162. mov edx, xHi
  163. }
  164. }
  165. }
  166. #elif (defined _WIN32) && (defined _WIN32_WCE)
  167. /* use asm function for now (EVC++ 3.0 does horrible job compiling __int64 version) */
  168. #define MULSHIFT32 xmp3_MULSHIFT32
  169. int MULSHIFT32(int x, int y);
  170. static __inline int FASTABS(int x)
  171. {
  172. int sign;
  173. sign = x >> (sizeof(int) * 8 - 1);
  174. x ^= sign;
  175. x -= sign;
  176. return x;
  177. }
  178. static __inline int CLZ(int x)
  179. {
  180. int numZeros;
  181. if (!x)
  182. return (sizeof(int) * 8);
  183. numZeros = 0;
  184. while (!(x & 0x80000000)) {
  185. numZeros++;
  186. x <<= 1;
  187. }
  188. return numZeros;
  189. }
  190. #elif defined XXXARM_ADS
  191. static __inline int MULSHIFT32(int x, int y)
  192. {
  193. /* important rules for smull RdLo, RdHi, Rm, Rs:
  194. * RdHi and Rm can't be the same register
  195. * RdLo and Rm can't be the same register
  196. * RdHi and RdLo can't be the same register
  197. * Note: Rs determines early termination (leading sign bits) so if you want to specify
  198. * which operand is Rs, put it in the SECOND argument (y)
  199. * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
  200. * which one is returned. (If this were a function call, returning y (R1) would
  201. * require an extra "mov r0, r1")
  202. */
  203. int zlow;
  204. __asm {
  205. smull zlow,y,x,y
  206. }
  207. return y;
  208. }
  209. static __inline int FASTABS(int x)
  210. {
  211. int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
  212. __asm {
  213. eor t, x, x, asr #31
  214. sub t, t, x, asr #31
  215. }
  216. return t;
  217. }
  218. static __inline int CLZ(int x)
  219. {
  220. int numZeros;
  221. if (!x)
  222. return (sizeof(int) * 8);
  223. numZeros = 0;
  224. while (!(x & 0x80000000)) {
  225. numZeros++;
  226. x <<= 1;
  227. }
  228. return numZeros;
  229. }
  230. #elif defined(__GNUC__) && defined(XXXX__thumb__)
  231. static __inline int MULSHIFT32(int x, int y)
  232. {
  233. // important rules for smull RdLo, RdHi, Rm, Rs:
  234. // RdHi and Rm can't be the same register
  235. // RdLo and Rm can't be the same register
  236. // RdHi and RdLo can't be the same register
  237. // Note: Rs determines early termination (leading sign bits) so if you want to specify
  238. // which operand is Rs, put it in the SECOND argument (y)
  239. // For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
  240. // which one is returned. (If this were a function call, returning y (R1) would
  241. // require an extra "mov r0, r1")
  242. int zlow;
  243. __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
  244. return y;
  245. }
  246. //fb
  247. #include <stdlib.h>
  248. static __inline int FASTABS(int x)
  249. {
  250. return abs(x);
  251. }
  252. static __inline int CLZ(int x)
  253. {
  254. return __builtin_clz(x);
  255. }
  256. //fb
  257. //mw
  258. //TODO: Check Compiler output on these.. (fb)
  259. static __inline Word64 xMADD64(Word64 sum, int x, int y)
  260. {
  261. return (sum + ((int64_t)x * y));
  262. }
  263. static __inline Word64 xHL64(Word64 x, int n)
  264. {
  265. return x << n;
  266. }
  267. static __inline Word64 xSAR64(Word64 x, int n)
  268. {
  269. return x >> n;
  270. }
  271. //mw
  272. #elif defined(__arm__)
  273. #if defined(ARM7DI)
  274. typedef long long Word64;
  275. static __inline int MULSHIFT32(int x, int y) {
  276. return x * y;
  277. }
  278. #else
  279. static __inline Word64 SAR64(Word64 x, int n) {
  280. return x >>= n;
  281. }
  282. typedef union _U64 {
  283. Word64 w64;
  284. struct {
  285. /* x86 = little endian */
  286. unsigned int lo32;
  287. signed int hi32;
  288. } r;
  289. } U64;
  290. static __inline Word64 MADD64(Word64 sum64, int x, int y)
  291. {
  292. sum64 += (Word64)x * (Word64)y;
  293. return sum64;
  294. }
  295. static __inline int MULSHIFT32(int x, int y)
  296. {
  297. /* important rules for smull RdLo, RdHi, Rm, Rs:
  298. * RdHi and Rm can't be the same register
  299. * RdLo and Rm can't be the same register
  300. * RdHi and RdLo can't be the same register
  301. * Note: Rs determines early termination (leading sign bits) so if you want to specify
  302. * which operand is Rs, put it in the SECOND argument (y)
  303. * For inline assembly, x and y are not assumed to be R0, R1 so it shouldn't matter
  304. * which one is returned. (If this were a function call, returning y (R1) would
  305. * require an extra "mov r0, r1")
  306. */
  307. int zlow;
  308. __asm__ volatile ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y)) ;
  309. return y;
  310. }
  311. #endif
  312. static __inline int FASTABS(int x)
  313. {
  314. int t=0; /*Really is not necessary to initialiaze only to avoid warning*/
  315. __asm__ volatile (
  316. "eor %0,%2,%2, asr #31;"
  317. "sub %0,%1,%2, asr #31;"
  318. : "=&r" (t)
  319. : "0" (t), "r" (x)
  320. );
  321. return t;
  322. }
  323. static __inline int CLZ(int x)
  324. {
  325. int numZeros;
  326. if (!x)
  327. return (sizeof(int) * 8);
  328. numZeros = 0;
  329. while (!(x & 0x80000000)) {
  330. numZeros++;
  331. x <<= 1;
  332. }
  333. return numZeros;
  334. }
  335. #elif defined(__APPLE__) || defined(ESP_PLATFORM) || defined(__amd64__)
  336. static __inline int FASTABS(int x)
  337. {
  338. int sign;
  339. sign = x >> (sizeof(int) * 8 - 1);
  340. x ^= sign;
  341. x -= sign;
  342. return x;
  343. }
  344. static __inline int CLZ(int x)
  345. {
  346. int numZeros;
  347. if (!x)
  348. return (sizeof(int) * 8);
  349. numZeros = 0;
  350. while (!(x & 0x80000000)) {
  351. numZeros++;
  352. x <<= 1;
  353. }
  354. return numZeros;
  355. }
  356. /* returns 64-bit value in [edx:eax] */
  357. static __inline Word64 MADD64(Word64 sum64, int x, int y)
  358. {
  359. sum64 += (Word64)x * (Word64)y;
  360. return sum64;
  361. }
  362. static __inline__ int MULSHIFT32(int x, int y)
  363. {
  364. int z;
  365. z = (Word64)x * (Word64)y >> 32;
  366. return z;
  367. }
  368. static __inline Word64 SAR64(Word64 x, int n)
  369. {
  370. return x >> n;
  371. }
  372. #else
  373. #error Unsupported platform in assembly.h
  374. #endif /* platforms */
  375. #endif /* _ASSEMBLY_H */