123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638 |
- /* ***** BEGIN LICENSE BLOCK *****
- * Source last modified: $Id: assembly.h,v 1.7 2005/11/10 00:04:40 margotm Exp $
- *
- * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.
- *
- * The contents of this file, and the files included with this file,
- * are subject to the current version of the RealNetworks Public
- * Source License (the "RPSL") available at
- * http://www.helixcommunity.org/content/rpsl unless you have licensed
- * the file under the current version of the RealNetworks Community
- * Source License (the "RCSL") available at
- * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
- * will apply. You may also obtain the license terms directly from
- * RealNetworks. You may not use this file except in compliance with
- * the RPSL or, if you have a valid RCSL with RealNetworks applicable
- * to this file, the RCSL. Please see the applicable RPSL or RCSL for
- * the rights, obligations and limitations governing use of the
- * contents of the file.
- *
- * This file is part of the Helix DNA Technology. RealNetworks is the
- * developer of the Original Code and owns the copyrights in the
- * portions it created.
- *
- * This file, and the files included with this file, is distributed
- * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
- * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
- * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
- * ENJOYMENT OR NON-INFRINGEMENT.
- *
- * Technology Compatibility Kit Test Suite(s) Location:
- * http://www.helixcommunity.org/content/tck
- *
- * Contributor(s):
- *
- * ***** END LICENSE BLOCK ***** */
- /**************************************************************************************
- * Fixed-point HE-AAC decoder
- * Jon Recker (jrecker@real.com)
- * February 2005
- *
- * assembly.h - inline assembly language functions and prototypes
- *
- * MULSHIFT32(x, y) signed multiply of two 32-bit integers (x and y),
- * returns top 32-bits of 64-bit result
- * CLIPTOSHORT(x) convert 32-bit integer to 16-bit short,
- * clipping to [-32768, 32767]
- * FASTABS(x) branchless absolute value of signed integer x
- * CLZ(x) count leading zeros on signed integer x
- * MADD64(sum64, x, y) 64-bit multiply accumulate: sum64 += (x*y)
- **************************************************************************************/
- #ifndef _ASSEMBLY_H
- #define _ASSEMBLY_H
- /* toolchain: MSFT Visual C++
- * target architecture: x86
- */
- #if (defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))
- #pragma warning( disable : 4035 ) /* complains about inline asm not returning a value */
- static __inline int MULSHIFT32(int x, int y)
- {
- __asm {
- mov eax, x
- imul y
- mov eax, edx
- }
- }
- static __inline short CLIPTOSHORT(int x)
- {
- int sign;
- /* clip to [-32768, 32767] */
- sign = x >> 31;
- if (sign != (x >> 15))
- x = sign ^ ((1 << 15) - 1);
- return (short)x;
- }
- static __inline int FASTABS(int x)
- {
- int sign;
- sign = x >> (sizeof(int) * 8 - 1);
- x ^= sign;
- x -= sign;
- return x;
- }
- static __inline int CLZ(int x)
- {
- int numZeros;
- if (!x)
- return 32;
- /* count leading zeros with binary search */
- numZeros = 1;
- if (!((unsigned int)x >> 16)) { numZeros += 16; x <<= 16; }
- if (!((unsigned int)x >> 24)) { numZeros += 8; x <<= 8; }
- if (!((unsigned int)x >> 28)) { numZeros += 4; x <<= 4; }
- if (!((unsigned int)x >> 30)) { numZeros += 2; x <<= 2; }
- numZeros -= ((unsigned int)x >> 31);
- return numZeros;
- }
- #ifdef __CW32__
- typedef long long Word64;
- #else
- typedef __int64 Word64;
- #endif
- typedef union _U64 {
- Word64 w64;
- struct {
- /* x86 = little endian */
- unsigned int lo32;
- signed int hi32;
- } r;
- } U64;
- /* returns 64-bit value in [edx:eax] */
- static __inline Word64 MADD64(Word64 sum64, int x, int y)
- {
- #if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)
- /* Workaround for the Symbian emulator because of non existing longlong.lib and
- * hence __allmul not defined. */
- __asm {
- mov eax, x
- imul y
- add dword ptr sum64, eax
- adc dword ptr sum64 + 4, edx
- }
- #else
- sum64 += (Word64)x * (Word64)y;
- #endif
- return sum64;
- }
- /* toolchain: MSFT Embedded Visual C++
- * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier)
- */
- #elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)
- static __inline short CLIPTOSHORT(int x)
- {
- int sign;
- /* clip to [-32768, 32767] */
- sign = x >> 31;
- if (sign != (x >> 15))
- x = sign ^ ((1 << 15) - 1);
- return (short)x;
- }
- static __inline int FASTABS(int x)
- {
- int sign;
- sign = x >> (sizeof(int) * 8 - 1);
- x ^= sign;
- x -= sign;
- return x;
- }
- static __inline int CLZ(int x)
- {
- int numZeros;
- if (!x)
- return 32;
- /* count leading zeros with binary search (function should be 17 ARM instructions total) */
- numZeros = 1;
- if (!((unsigned int)x >> 16)) { numZeros += 16; x <<= 16; }
- if (!((unsigned int)x >> 24)) { numZeros += 8; x <<= 8; }
- if (!((unsigned int)x >> 28)) { numZeros += 4; x <<= 4; }
- if (!((unsigned int)x >> 30)) { numZeros += 2; x <<= 2; }
- numZeros -= ((unsigned int)x >> 31);
- return numZeros;
- }
- /* implemented in asmfunc.s */
- #ifdef __cplusplus
- extern "C" {
- #endif
- typedef __int64 Word64;
- typedef union _U64 {
- Word64 w64;
- struct {
- /* ARM WinCE = little endian */
- unsigned int lo32;
- signed int hi32;
- } r;
- } U64;
- /* manual name mangling for just this platform (must match labels in .s file) */
- #define MULSHIFT32 raac_MULSHIFT32
- #define MADD64 raac_MADD64
- int MULSHIFT32(int x, int y);
- Word64 MADD64(Word64 sum64, int x, int y);
- #ifdef __cplusplus
- }
- #endif
- /* toolchain: ARM ADS or RealView
- * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
- */
- #elif defined (XXX__arm) && defined (__ARMCC_VERSION)
- static __inline int MULSHIFT32(int x, int y)
- {
- /* rules for smull RdLo, RdHi, Rm, Rs:
- * RdHi != Rm
- * RdLo != Rm
- * RdHi != RdLo
- */
- int zlow;
- __asm {
- smull zlow,y,x,y
- }
- return y;
- }
- static __inline short CLIPTOSHORT(int x)
- {
- int sign;
- /* clip to [-32768, 32767] */
- sign = x >> 31;
- if (sign != (x >> 15))
- x = sign ^ ((1 << 15) - 1);
- return (short)x;
- }
- static __inline int FASTABS(int x)
- {
- int sign;
- sign = x >> (sizeof(int) * 8 - 1);
- x ^= sign;
- x -= sign;
- return x;
- }
- static __inline int CLZ(int x)
- {
- int numZeros;
- if (!x)
- return 32;
- /* count leading zeros with binary search (function should be 17 ARM instructions total) */
- numZeros = 1;
- if (!((unsigned int)x >> 16)) { numZeros += 16; x <<= 16; }
- if (!((unsigned int)x >> 24)) { numZeros += 8; x <<= 8; }
- if (!((unsigned int)x >> 28)) { numZeros += 4; x <<= 4; }
- if (!((unsigned int)x >> 30)) { numZeros += 2; x <<= 2; }
- numZeros -= ((unsigned int)x >> 31);
- return numZeros;
- /* ARM code would look like this, but do NOT use inline asm in ADS for this,
- because you can't safely use the status register flags intermixed with C code
-
- __asm {
- mov numZeros, #1
- tst x, 0xffff0000
- addeq numZeros, numZeros, #16
- moveq x, x, lsl #16
- tst x, 0xff000000
- addeq numZeros, numZeros, #8
- moveq x, x, lsl #8
- tst x, 0xf0000000
- addeq numZeros, numZeros, #4
- moveq x, x, lsl #4
- tst x, 0xc0000000
- addeq numZeros, numZeros, #2
- moveq x, x, lsl #2
- sub numZeros, numZeros, x, lsr #31
- }
- */
- /* reference:
- numZeros = 0;
- while (!(x & 0x80000000)) {
- numZeros++;
- x <<= 1;
- }
- */
- }
- typedef __int64 Word64;
- typedef union _U64 {
- Word64 w64;
- struct {
- /* ARM ADS = little endian */
- unsigned int lo32;
- signed int hi32;
- } r;
- } U64;
- static __inline Word64 MADD64(Word64 sum64, int x, int y)
- {
- U64 u;
- u.w64 = sum64;
-
- __asm {
- smlal u.r.lo32, u.r.hi32, x, y
- }
- return u.w64;
- }
- /* toolchain: ARM gcc
- * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier)
- */
- #elif defined(__GNUC__) && defined(XXXX__arm__)
- static inline int MULSHIFT32(int x, int y)
- {
- int zlow;
- asm ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y) : "cc");
- return y;
- }
- /*
- static inline short CLIPTOSHORT(int x)
- {
- int sign;
- // clip to [-32768, 32767] //
- sign = x >> 31;
- if (sign != (x >> 15))
- x = sign ^ ((1 << 15) - 1);
- return (short)x;
- }
- */
- static inline short CLIPTOSHORT(int x)
- {
- asm ("ssat %0, #16, %1" : "=r" (x) : "r" (x));
- return x;
- }
- /* From coder.h, ORIGINAL:
- clip to [-2^n, 2^n-1], valid range of n = [1, 30]
- //TODO (FB) Is there a better way ?
- */
- #define CLIP_2N(y, n) { \
- int sign = (y) >> 31; \
- if (sign != (y) >> (n)) { \
- (y) = sign ^ ((1 << (n)) - 1); \
- } \
- }
- /* From coder.h, ORIGINAL:
- do y <<= n, clipping to range [-2^30, 2^30 - 1] (i.e. output has one guard bit)
- */
- //TODO (FB) Is there a better way ?
- #define CLIP_2N_SHIFT(y, n) { \
- int sign = (y) >> 31; \
- if (sign != (y) >> (30 - (n))) { \
- (y) = sign ^ (0x3fffffff); \
- } else { \
- (y) = (y) << (n); \
- } \
- }
- #define FASTABS(x) abs(x) //FB
- #define CLZ(x) __builtin_clz(x) //FB
- //Reverse byte order (16 bit) //FB
- static inline unsigned int REV16( unsigned int value)
- {
- asm ("rev16 %0, %1" : "=r" (value) : "r" (value) );
- return(value);
- }
- //Reverse byte order (32 bit) //FB
- static inline unsigned int REV32( unsigned int value)
- {
- asm ("rev %0, %1" : "=r" (value) : "r" (value) );
- return(value);
- }
- typedef long long Word64;
- typedef union _U64 {
- Word64 w64;
- struct {
- /* little endian */
- unsigned int lo32;
- signed int hi32;
- } r;
- } U64;
- static inline Word64 MADD64(Word64 sum64, int x, int y)
- {
- U64 u;
- u.w64 = sum64;
- asm ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");
- return u.w64;
- }
- /* toolchain: x86 gcc
- * target architecture: x86
- */
- #elif defined(__APPLE__) || defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))
- typedef long long Word64;
- static __inline__ int MULSHIFT32(int x, int y)
- {
- int z;
- z = (Word64)x * (Word64)y >> 32;
-
- return z;
- }
- static __inline short CLIPTOSHORT(int x)
- {
- int sign;
- /* clip to [-32768, 32767] */
- sign = x >> 31;
- if (sign != (x >> 15))
- x = sign ^ ((1 << 15) - 1);
- return (short)x;
- }
- static __inline int FASTABS(int x)
- {
- int sign;
- sign = x >> (sizeof(int) * 8 - 1);
- x ^= sign;
- x -= sign;
- return x;
- }
- static __inline int CLZ(int x)
- {
- int numZeros;
- if (!x)
- return 32;
- /* count leading zeros with binary search (function should be 17 ARM instructions total) */
- numZeros = 1;
- if (!((unsigned int)x >> 16)) { numZeros += 16; x <<= 16; }
- if (!((unsigned int)x >> 24)) { numZeros += 8; x <<= 8; }
- if (!((unsigned int)x >> 28)) { numZeros += 4; x <<= 4; }
- if (!((unsigned int)x >> 30)) { numZeros += 2; x <<= 2; }
- numZeros -= ((unsigned int)x >> 31);
- return numZeros;
- }
- typedef union _U64 {
- Word64 w64;
- struct {
- /* x86 = little endian */
- unsigned int lo32;
- signed int hi32;
- } r;
- } U64;
- static __inline Word64 MADD64(Word64 sum64, int x, int y)
- {
- sum64 += (Word64)x * (Word64)y;
- return sum64;
- }
- #elif defined(ESP_PLATFORM) || defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))
- typedef long long Word64;
- static __inline__ int MULSHIFT32(int x, int y)
- {
- int z;
- z = (Word64)x * (Word64)y >> 32;
-
- return z;
- }
- static __inline short CLIPTOSHORT(int x)
- {
- int sign;
- /* clip to [-32768, 32767] */
- sign = x >> 31;
- if (sign != (x >> 15))
- x = sign ^ ((1 << 15) - 1);
- return (short)x;
- }
- static __inline int FASTABS(int x)
- {
- int sign;
- sign = x >> (sizeof(int) * 8 - 1);
- x ^= sign;
- x -= sign;
- return x;
- }
- static __inline int CLZ(int x)
- {
- int numZeros;
- if (!x)
- return 32;
- /* count leading zeros with binary search (function should be 17 ARM instructions total) */
- numZeros = 1;
- if (!((unsigned int)x >> 16)) { numZeros += 16; x <<= 16; }
- if (!((unsigned int)x >> 24)) { numZeros += 8; x <<= 8; }
- if (!((unsigned int)x >> 28)) { numZeros += 4; x <<= 4; }
- if (!((unsigned int)x >> 30)) { numZeros += 2; x <<= 2; }
- numZeros -= ((unsigned int)x >> 31);
- return numZeros;
- }
- typedef union _U64 {
- Word64 w64;
- struct {
- #ifdef __XTENSA__
- unsigned int lo32;
- signed int hi32;
- #else
- /* PowerPC = big endian */
- signed int hi32;
- unsigned int lo32;
- #endif
- } r;
- } U64;
- static __inline Word64 MADD64(Word64 sum64, int x, int y)
- {
- sum64 += (Word64)x * (Word64)y;
- return sum64;
- }
- /* From coder.h, ORIGINAL:
- clip to [-2^n, 2^n-1], valid range of n = [1, 30]
- //TODO (FB) Is there a better way ?
- */
- #define CLIP_2N(y, n) { \
- int sign = (y) >> 31; \
- if (sign != (y) >> (n)) { \
- (y) = sign ^ ((1 << (n)) - 1); \
- } \
- }
- /* From coder.h, ORIGINAL:
- do y <<= n, clipping to range [-2^30, 2^30 - 1] (i.e. output has one guard bit)
- */
- //TODO (FB) Is there a better way ?
- #define CLIP_2N_SHIFT(y, n) { \
- int sign = (y) >> 31; \
- if (sign != (y) >> (30 - (n))) { \
- (y) = sign ^ (0x3fffffff); \
- } else { \
- (y) = (y) << (n); \
- } \
- }
- //#define FASTABS(x) abs(x) //FB
- //#define CLZ(x) __builtin_clz(x) //FB
- #else
- #error Unsupported platform in assembly.h
- #endif /* platforms */
- #ifndef CLIP_2N
- #define CLIP_2N(y, n) { \
- int sign = (y) >> 31; \
- if (sign != (y) >> (n)) { \
- (y) = sign ^ ((1 << (n)) - 1); \
- } \
- }
- #endif
- #ifndef CLIP_2N_SHIFT
- /* From coder.h, ORIGINAL:
- do y <<= n, clipping to range [-2^30, 2^30 - 1] (i.e. output has one guard bit)
- */
- //TODO (FB) Is there a better way ?
- #define CLIP_2N_SHIFT(y, n) { \
- int sign = (y) >> 31; \
- if (sign != (y) >> (30 - (n))) { \
- (y) = sign ^ (0x3fffffff); \
- } else { \
- (y) = (y) << (n); \
- } \
- }
- #endif
- #endif /* _ASSEMBLY_H */
|