| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638 | /* ***** BEGIN LICENSE BLOCK *****   * Source last modified: $Id: assembly.h,v 1.7 2005/11/10 00:04:40 margotm Exp $  *    * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.   *        * The contents of this file, and the files included with this file,  * are subject to the current version of the RealNetworks Public  * Source License (the "RPSL") available at  * http://www.helixcommunity.org/content/rpsl unless you have licensed  * the file under the current version of the RealNetworks Community  * Source License (the "RCSL") available at  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL  * will apply. You may also obtain the license terms directly from  * RealNetworks.  You may not use this file except in compliance with  * the RPSL or, if you have a valid RCSL with RealNetworks applicable  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for  * the rights, obligations and limitations governing use of the  * contents of the file.  *    * This file is part of the Helix DNA Technology. RealNetworks is the  * developer of the Original Code and owns the copyrights in the  * portions it created.  *    * This file, and the files included with this file, is distributed  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET  * ENJOYMENT OR NON-INFRINGEMENT.  *   * Technology Compatibility Kit Test Suite(s) Location:   *    http://www.helixcommunity.org/content/tck   *   * Contributor(s):   *    * ***** END LICENSE BLOCK ***** */  /************************************************************************************** * Fixed-point HE-AAC decoder * Jon Recker (jrecker@real.com) * February 2005 * * assembly.h - inline assembly language functions and prototypes * * MULSHIFT32(x, y) 		signed multiply of two 32-bit integers (x and y),  *                            returns top 32-bits of 64-bit result * CLIPTOSHORT(x)			convert 32-bit integer to 16-bit short,  *                            clipping to [-32768, 32767] * FASTABS(x)               branchless absolute value of signed integer x * CLZ(x)                   count leading zeros on signed integer x * MADD64(sum64, x, y)		64-bit multiply accumulate: sum64 += (x*y) **************************************************************************************/#ifndef _ASSEMBLY_H#define _ASSEMBLY_H/* toolchain:           MSFT Visual C++ * target architecture: x86 */#if (defined (_WIN32) && !defined (_WIN32_WCE)) || (defined (__WINS__) && defined (_SYMBIAN)) || (defined (WINCE_EMULATOR)) || (defined (_OPENWAVE_SIMULATOR))#pragma warning( disable : 4035 )	/* complains about inline asm not returning a value */static __inline int MULSHIFT32(int x, int y)	{    __asm {		mov		eax, x	    imul	y	    mov		eax, edx	    }}static __inline short CLIPTOSHORT(int x){	int sign;	/* clip to [-32768, 32767] */	sign = x >> 31;	if (sign != (x >> 15))		x = sign ^ ((1 << 15) - 1);	return (short)x;}static __inline int FASTABS(int x) {	int sign;	sign = x >> (sizeof(int) * 8 - 1);	x ^= sign;	x -= sign;	return x;}static __inline int CLZ(int x){	int numZeros;	if (!x)		return 32;	/* count leading zeros with binary search */	numZeros = 1;	if (!((unsigned int)x >> 16))	{ numZeros += 16; x <<= 16; }	if (!((unsigned int)x >> 24))	{ numZeros +=  8; x <<=  8; }	if (!((unsigned int)x >> 28))	{ numZeros +=  4; x <<=  4; }	if (!((unsigned int)x >> 30))	{ numZeros +=  2; x <<=  2; }	numZeros -= ((unsigned int)x >> 31);	return numZeros;}#ifdef __CW32__typedef long long Word64;#elsetypedef __int64 Word64;#endiftypedef union _U64 {	Word64 w64;	struct {		/* x86 = little endian */		unsigned int lo32; 		signed int   hi32;	} r;} U64;/* returns 64-bit value in [edx:eax] */static __inline Word64 MADD64(Word64 sum64, int x, int y){#if (defined (_SYMBIAN_61_) || defined (_SYMBIAN_70_)) && defined (__WINS__) && !defined (__CW32__)/* Workaround for the Symbian emulator because of non existing longlong.lib and * hence __allmul not defined. */    __asm {        mov     eax, x        imul    y        add     dword ptr sum64, eax        adc     dword ptr sum64 + 4, edx    }#else    sum64 += (Word64)x * (Word64)y;#endif    return sum64;}/* toolchain:           MSFT Embedded Visual C++ * target architecture: ARM v.4 and above (require 'M' type processor for 32x32->64 multiplier) */#elif defined (_WIN32) && defined (_WIN32_WCE) && defined (ARM)static __inline short CLIPTOSHORT(int x){	int sign;	/* clip to [-32768, 32767] */	sign = x >> 31;	if (sign != (x >> 15))		x = sign ^ ((1 << 15) - 1);	return (short)x;}static __inline int FASTABS(int x) {	int sign;	sign = x >> (sizeof(int) * 8 - 1);	x ^= sign;	x -= sign;	return x;}static __inline int CLZ(int x){	int numZeros;	if (!x)		return 32;	/* count leading zeros with binary search (function should be 17 ARM instructions total) */	numZeros = 1;	if (!((unsigned int)x >> 16))	{ numZeros += 16; x <<= 16; }	if (!((unsigned int)x >> 24))	{ numZeros +=  8; x <<=  8; }	if (!((unsigned int)x >> 28))	{ numZeros +=  4; x <<=  4; }	if (!((unsigned int)x >> 30))	{ numZeros +=  2; x <<=  2; }	numZeros -= ((unsigned int)x >> 31);	return numZeros;}/* implemented in asmfunc.s */#ifdef __cplusplusextern "C" {#endiftypedef __int64 Word64;typedef union _U64 {	Word64 w64;	struct {		/* ARM WinCE = little endian */		unsigned int lo32; 		signed int   hi32;	} r;} U64;/* manual name mangling for just this platform (must match labels in .s file) */#define MULSHIFT32	raac_MULSHIFT32#define MADD64		raac_MADD64int MULSHIFT32(int x, int y);Word64 MADD64(Word64 sum64, int x, int y);#ifdef __cplusplus}#endif/* toolchain:           ARM ADS or RealView * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier) */#elif defined (XXX__arm) && defined (__ARMCC_VERSION)static __inline int MULSHIFT32(int x, int y){    /* rules for smull RdLo, RdHi, Rm, Rs:     *   RdHi != Rm      *   RdLo != Rm      *   RdHi != RdLo     */    int zlow;    __asm {    	smull zlow,y,x,y   	}    return y;}static __inline short CLIPTOSHORT(int x){	int sign;	/* clip to [-32768, 32767] */	sign = x >> 31;	if (sign != (x >> 15))		x = sign ^ ((1 << 15) - 1);	return (short)x;}static __inline int FASTABS(int x) {	int sign;	sign = x >> (sizeof(int) * 8 - 1);	x ^= sign;	x -= sign;	return x;}static __inline int CLZ(int x){	int numZeros;	if (!x)		return 32;	/* count leading zeros with binary search (function should be 17 ARM instructions total) */	numZeros = 1;	if (!((unsigned int)x >> 16))	{ numZeros += 16; x <<= 16; }	if (!((unsigned int)x >> 24))	{ numZeros +=  8; x <<=  8; }	if (!((unsigned int)x >> 28))	{ numZeros +=  4; x <<=  4; }	if (!((unsigned int)x >> 30))	{ numZeros +=  2; x <<=  2; }	numZeros -= ((unsigned int)x >> 31);	return numZeros;/* ARM code would look like this, but do NOT use inline asm in ADS for this,   because you can't safely use the status register flags intermixed with C code  	__asm {	    mov		numZeros, #1		tst		x, 0xffff0000		addeq	numZeros, numZeros, #16		moveq	x, x, lsl #16		tst		x, 0xff000000		addeq	numZeros, numZeros, #8		moveq	x, x, lsl #8		tst		x, 0xf0000000		addeq	numZeros, numZeros, #4		moveq	x, x, lsl #4		tst		x, 0xc0000000		addeq	numZeros, numZeros, #2		moveq	x, x, lsl #2		sub		numZeros, numZeros, x, lsr #31	}*//* reference:	numZeros = 0;	while (!(x & 0x80000000)) {		numZeros++;		x <<= 1;	} */}typedef __int64 Word64;typedef union _U64 {	Word64 w64;	struct {		/* ARM ADS = little endian */		unsigned int lo32; 		signed int   hi32;	} r;} U64;static __inline Word64 MADD64(Word64 sum64, int x, int y) {	U64 u;	u.w64 = sum64;		__asm {    	smlal u.r.lo32, u.r.hi32, x, y 	}	return u.w64;}/* toolchain:           ARM gcc * target architecture: ARM v.4 and above (requires 'M' type processor for 32x32->64 multiplier) */#elif defined(__GNUC__) && defined(XXXX__arm__)static inline int MULSHIFT32(int x, int y){    int zlow;    asm ("smull %0,%1,%2,%3" : "=&r" (zlow), "=r" (y) : "r" (x), "1" (y) : "cc");    return y;}/*static inline short CLIPTOSHORT(int x) {	int sign;	// clip to [-32768, 32767] //	sign = x >> 31;	if (sign != (x >> 15))		x = sign ^ ((1 << 15) - 1);	return (short)x;}*/	static inline short CLIPTOSHORT(int x) {	asm ("ssat %0, #16, %1" : "=r" (x) : "r" (x));	return x;}/* From coder.h, ORIGINAL:clip to [-2^n, 2^n-1], valid range of n = [1, 30]//TODO (FB) Is there a better way ?*/#define CLIP_2N(y, n) { \	int sign = (y) >> 31;  \	if (sign != (y) >> (n))  { \		(y) = sign ^ ((1 << (n)) - 1); \	} \}/* From coder.h, ORIGINAL: do y <<= n, clipping to range [-2^30, 2^30 - 1] (i.e. output has one guard bit) *///TODO (FB) Is there a better way ?#define CLIP_2N_SHIFT(y, n) {                   \        int sign = (y) >> 31;                   \        if (sign != (y) >> (30 - (n)))  {       \            (y) = sign ^ (0x3fffffff);          \        } else {                                \            (y) = (y) << (n);                   \        }                                       \    }#define FASTABS(x) abs(x) //FB#define CLZ(x) __builtin_clz(x) //FB//Reverse byte order (16 bit) //FBstatic inline unsigned int REV16( unsigned int value){	asm ("rev16 %0, %1" : "=r" (value) : "r" (value) );	return(value);}//Reverse byte order (32 bit) //FBstatic inline unsigned int REV32( unsigned int value){	asm ("rev %0, %1" : "=r" (value) : "r" (value) );	return(value);}typedef long long Word64;typedef union _U64 {	Word64 w64;	struct {		/* little endian */		unsigned int lo32;		signed int   hi32;	} r;} U64;static inline Word64 MADD64(Word64 sum64, int x, int y){	U64 u;	u.w64 = sum64;	asm ("smlal %0,%1,%2,%3" : "+&r" (u.r.lo32), "+&r" (u.r.hi32) : "r" (x), "r" (y) : "cc");	return u.w64;}/* toolchain:           x86 gcc * target architecture: x86 */#elif defined(__APPLE__) || defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) || (defined (_SOLARIS) && !defined (__GNUC__) && defined(_SOLARISX86))typedef long long Word64;static __inline__ int MULSHIFT32(int x, int y){    int z;    z = (Word64)x * (Word64)y >> 32;    	return z;}static __inline short CLIPTOSHORT(int x){	int sign;	/* clip to [-32768, 32767] */	sign = x >> 31;	if (sign != (x >> 15))		x = sign ^ ((1 << 15) - 1);	return (short)x;}static __inline int FASTABS(int x) {	int sign;	sign = x >> (sizeof(int) * 8 - 1);	x ^= sign;	x -= sign;	return x;}static __inline int CLZ(int x){	int numZeros;	if (!x)		return 32;	/* count leading zeros with binary search (function should be 17 ARM instructions total) */	numZeros = 1;	if (!((unsigned int)x >> 16))	{ numZeros += 16; x <<= 16; }	if (!((unsigned int)x >> 24))	{ numZeros +=  8; x <<=  8; }	if (!((unsigned int)x >> 28))	{ numZeros +=  4; x <<=  4; }	if (!((unsigned int)x >> 30))	{ numZeros +=  2; x <<=  2; }	numZeros -= ((unsigned int)x >> 31);	return numZeros;}typedef union _U64 {	Word64 w64;	struct {		/* x86 = little endian */		unsigned int lo32;		signed int   hi32;	} r;} U64;static __inline Word64 MADD64(Word64 sum64, int x, int y){	sum64 += (Word64)x * (Word64)y;	return sum64;}#elif defined(ESP_PLATFORM) || defined(__GNUC__) && (defined(__powerpc__) || defined(__POWERPC__)) || (defined (_SOLARIS) && !defined (__GNUC__) && !defined (_SOLARISX86))typedef long long Word64;static __inline__ int MULSHIFT32(int x, int y){    int z;    z = (Word64)x * (Word64)y >> 32;    	return z;}static __inline short CLIPTOSHORT(int x){	int sign;	/* clip to [-32768, 32767] */	sign = x >> 31;	if (sign != (x >> 15))		x = sign ^ ((1 << 15) - 1);	return (short)x;}static __inline int FASTABS(int x) {	int sign;	sign = x >> (sizeof(int) * 8 - 1);	x ^= sign;	x -= sign;	return x;}static __inline int CLZ(int x){	int numZeros;	if (!x)		return 32;	/* count leading zeros with binary search (function should be 17 ARM instructions total) */	numZeros = 1;	if (!((unsigned int)x >> 16))	{ numZeros += 16; x <<= 16; }	if (!((unsigned int)x >> 24))	{ numZeros +=  8; x <<=  8; }	if (!((unsigned int)x >> 28))	{ numZeros +=  4; x <<=  4; }	if (!((unsigned int)x >> 30))	{ numZeros +=  2; x <<=  2; }	numZeros -= ((unsigned int)x >> 31);	return numZeros;}typedef union _U64 {	Word64 w64;	struct {#ifdef __XTENSA__				unsigned int lo32;		signed int   hi32;#else		/* PowerPC = big endian */		signed int   hi32;		unsigned int lo32;#endif			} r;} U64;static __inline Word64 MADD64(Word64 sum64, int x, int y){	sum64 += (Word64)x * (Word64)y;	return sum64;}/* From coder.h, ORIGINAL:clip to [-2^n, 2^n-1], valid range of n = [1, 30]//TODO (FB) Is there a better way ?*/#define CLIP_2N(y, n) { \	int sign = (y) >> 31;  \	if (sign != (y) >> (n))  { \		(y) = sign ^ ((1 << (n)) - 1); \	} \}/* From coder.h, ORIGINAL: do y <<= n, clipping to range [-2^30, 2^30 - 1] (i.e. output has one guard bit) *///TODO (FB) Is there a better way ?#define CLIP_2N_SHIFT(y, n) {                   \        int sign = (y) >> 31;                   \        if (sign != (y) >> (30 - (n)))  {       \            (y) = sign ^ (0x3fffffff);          \        } else {                                \            (y) = (y) << (n);                   \        }                                       \    }//#define FASTABS(x) abs(x) //FB//#define CLZ(x) __builtin_clz(x) //FB#else#error Unsupported platform in assembly.h#endif	/* platforms */#ifndef CLIP_2N#define CLIP_2N(y, n) { \        int sign = (y) >> 31;  \        if (sign != (y) >> (n))  { \                (y) = sign ^ ((1 << (n)) - 1); \        } \}#endif#ifndef CLIP_2N_SHIFT/* From coder.h, ORIGINAL: do y <<= n, clipping to range [-2^30, 2^30 - 1] (i.e. output has one guard bit) *///TODO (FB) Is there a better way ?#define CLIP_2N_SHIFT(y, n) {                   \        int sign = (y) >> 31;                   \        if (sign != (y) >> (30 - (n)))  {       \            (y) = sign ^ (0x3fffffff);          \        } else {                                \            (y) = (y) << (n);                   \        }                                       \    }#endif#endif /* _ASSEMBLY_H */
 |