| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 | 
							- /* ***** BEGIN LICENSE BLOCK *****  
 
-  * Source last modified: $Id: dct4.c,v 1.1 2005/02/26 01:47:34 jrecker Exp $ 
 
-  *   
 
-  * Portions Copyright (c) 1995-2005 RealNetworks, Inc. All Rights Reserved.  
 
-  *       
 
-  * The contents of this file, and the files included with this file, 
 
-  * are subject to the current version of the RealNetworks Public 
 
-  * Source License (the "RPSL") available at 
 
-  * http://www.helixcommunity.org/content/rpsl unless you have licensed 
 
-  * the file under the current version of the RealNetworks Community 
 
-  * Source License (the "RCSL") available at 
 
-  * http://www.helixcommunity.org/content/rcsl, in which case the RCSL 
 
-  * will apply. You may also obtain the license terms directly from 
 
-  * RealNetworks.  You may not use this file except in compliance with 
 
-  * the RPSL or, if you have a valid RCSL with RealNetworks applicable 
 
-  * to this file, the RCSL.  Please see the applicable RPSL or RCSL for 
 
-  * the rights, obligations and limitations governing use of the 
 
-  * contents of the file. 
 
-  *   
 
-  * This file is part of the Helix DNA Technology. RealNetworks is the 
 
-  * developer of the Original Code and owns the copyrights in the 
 
-  * portions it created. 
 
-  *   
 
-  * This file, and the files included with this file, is distributed 
 
-  * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY 
 
-  * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS 
 
-  * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES 
 
-  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET 
 
-  * ENJOYMENT OR NON-INFRINGEMENT. 
 
-  *  
 
-  * Technology Compatibility Kit Test Suite(s) Location:  
 
-  *    http://www.helixcommunity.org/content/tck  
 
-  *  
 
-  * Contributor(s):  
 
-  *   
 
-  * ***** END LICENSE BLOCK ***** */  
 
- /**************************************************************************************
 
-  * Fixed-point HE-AAC decoder
 
-  * Jon Recker (jrecker@real.com), Ken Cooke (kenc@real.com)
 
-  * February 2005
 
-  *
 
-  * dct4.c - optimized DCT-IV
 
-  **************************************************************************************/
 
- #include "coder.h"
 
- #include "assembly.h"
 
- static const int nmdctTab[NUM_IMDCT_SIZES] PROGMEM = {128, 1024};
 
- static const int postSkip[NUM_IMDCT_SIZES] PROGMEM = {15, 1};
 
- /**************************************************************************************
 
-  * Function:    PreMultiply
 
-  *
 
-  * Description: pre-twiddle stage of DCT4
 
-  *
 
-  * Inputs:      table index (for transform size)
 
-  *              buffer of nmdct samples
 
-  *
 
-  * Outputs:     processed samples in same buffer
 
-  *
 
-  * Return:      none
 
-  *
 
-  * Notes:       minimum 1 GB in, 2 GB out, gains 5 (short) or 8 (long) frac bits
 
-  *              i.e. gains 2-7= -5 int bits (short) or 2-10 = -8 int bits (long)
 
-  *              normalization by -1/N is rolled into tables here (see trigtabs.c)
 
-  *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 
-  **************************************************************************************/
 
- static void PreMultiply(int tabidx, int *zbuf1)
 
- {
 
- 	int i, nmdct, ar1, ai1, ar2, ai2, z1, z2;
 
- 	int t, cms2, cps2a, sin2a, cps2b, sin2b;
 
- 	int *zbuf2;
 
- 	const int *csptr;
 
- 	nmdct = nmdctTab[tabidx];		
 
- 	zbuf2 = zbuf1 + nmdct - 1;
 
- 	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
 
- 	/* whole thing should fit in registers - verify that compiler does this */
 
- 	for (i = nmdct >> 2; i != 0; i--) {
 
- 		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
 
- 		cps2a = *csptr++;
 
- 		sin2a = *csptr++;
 
- 		cps2b = *csptr++;
 
- 		sin2b = *csptr++;
 
- 		ar1 = *(zbuf1 + 0);
 
- 		ai2 = *(zbuf1 + 1);
 
- 		ai1 = *(zbuf2 + 0);
 
- 		ar2 = *(zbuf2 - 1);
 
- 		/* gain 2 ints bit from MULSHIFT32 by Q30, but drop 7 or 10 int bits from table scaling of 1/M
 
- 		 * max per-sample gain (ignoring implicit scaling) = MAX(sin(angle)+cos(angle)) = 1.414
 
- 		 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
 
- 		 *   extra sign bits, and eat one in adding
 
- 		 */
 
- 		t  = MULSHIFT32(sin2a, ar1 + ai1);
 
- 		z2 = MULSHIFT32(cps2a, ai1) - t;
 
- 		cms2 = cps2a - 2*sin2a;
 
- 		z1 = MULSHIFT32(cms2, ar1) + t;
 
- 		*zbuf1++ = z1;	/* cos*ar1 + sin*ai1 */
 
- 		*zbuf1++ = z2;	/* cos*ai1 - sin*ar1 */
 
- 		t  = MULSHIFT32(sin2b, ar2 + ai2);
 
- 		z2 = MULSHIFT32(cps2b, ai2) - t;
 
- 		cms2 = cps2b - 2*sin2b;
 
- 		z1 = MULSHIFT32(cms2, ar2) + t;
 
- 		*zbuf2-- = z2;	/* cos*ai2 - sin*ar2 */
 
- 		*zbuf2-- = z1;	/* cos*ar2 + sin*ai2 */
 
- 	}
 
- }
 
- /**************************************************************************************
 
-  * Function:    PostMultiply
 
-  *
 
-  * Description: post-twiddle stage of DCT4
 
-  *
 
-  * Inputs:      table index (for transform size)
 
-  *              buffer of nmdct samples
 
-  *
 
-  * Outputs:     processed samples in same buffer
 
-  *
 
-  * Return:      none
 
-  *
 
-  * Notes:       minimum 1 GB in, 2 GB out - gains 2 int bits
 
-  *              uses 3-mul, 3-add butterflies instead of 4-mul, 2-add
 
-  **************************************************************************************/
 
- static void PostMultiply(int tabidx, int *fft1)
 
- {
 
- 	int i, nmdct, ar1, ai1, ar2, ai2, skipFactor;
 
- 	int t, cms2, cps2, sin2;
 
- 	int *fft2;
 
- 	const int *csptr;
 
- 	nmdct = nmdctTab[tabidx];		
 
- 	csptr = cos1sin1tab;
 
- 	skipFactor = postSkip[tabidx];
 
- 	fft2 = fft1 + nmdct - 1;
 
- 	/* load coeffs for first pass
 
- 	 * cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin)
 
- 	 */
 
- 	cps2 = *csptr++;
 
- 	sin2 = *csptr;
 
- 	csptr += skipFactor;
 
- 	cms2 = cps2 - 2*sin2;
 
- 	for (i = nmdct >> 2; i != 0; i--) {
 
- 		ar1 = *(fft1 + 0);
 
- 		ai1 = *(fft1 + 1);
 
- 		ar2 = *(fft2 - 1);
 
- 		ai2 = *(fft2 + 0);
 
- 		/* gain 2 ints bit from MULSHIFT32 by Q30
 
- 		 * max per-sample gain = MAX(sin(angle)+cos(angle)) = 1.414
 
- 		 * i.e. gain 1 GB since worst case is sin(angle) = cos(angle) = 0.707 (Q30), gain 2 from
 
- 		 *   extra sign bits, and eat one in adding
 
- 		 */
 
- 		t = MULSHIFT32(sin2, ar1 + ai1);
 
- 		*fft2-- = t - MULSHIFT32(cps2, ai1);	/* sin*ar1 - cos*ai1 */
 
- 		*fft1++ = t + MULSHIFT32(cms2, ar1);	/* cos*ar1 + sin*ai1 */
 
- 		cps2 = *csptr++;
 
- 		sin2 = *csptr;
 
- 		csptr += skipFactor;
 
- 		ai2 = -ai2;
 
- 		t = MULSHIFT32(sin2, ar2 + ai2);
 
- 		*fft2-- = t - MULSHIFT32(cps2, ai2);	/* sin*ar1 - cos*ai1 */
 
- 		cms2 = cps2 - 2*sin2;
 
- 		*fft1++ = t + MULSHIFT32(cms2, ar2);	/* cos*ar1 + sin*ai1 */
 
- 	}
 
- }
 
- /**************************************************************************************
 
-  * Function:    PreMultiplyRescale
 
-  *
 
-  * Description: pre-twiddle stage of DCT4, with rescaling for extra guard bits
 
-  *
 
-  * Inputs:      table index (for transform size)
 
-  *              buffer of nmdct samples
 
-  *              number of guard bits to add to input before processing
 
-  *
 
-  * Outputs:     processed samples in same buffer
 
-  *
 
-  * Return:      none
 
-  *
 
-  * Notes:       see notes on PreMultiply(), above
 
-  **************************************************************************************/
 
-  /* __attribute__ ((section (".data"))) */ static void PreMultiplyRescale(int tabidx, int *zbuf1, int es)
 
- {
 
- 	int i, nmdct, ar1, ai1, ar2, ai2, z1, z2;
 
- 	int t, cms2, cps2a, sin2a, cps2b, sin2b;
 
- 	int *zbuf2;
 
- 	const int *csptr;
 
- 	nmdct = nmdctTab[tabidx];		
 
- 	zbuf2 = zbuf1 + nmdct - 1;
 
- 	csptr = cos4sin4tab + cos4sin4tabOffset[tabidx];
 
- 	/* whole thing should fit in registers - verify that compiler does this */
 
- 	for (i = nmdct >> 2; i != 0; i--) {
 
- 		/* cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin) */
 
- 		cps2a = *csptr++;	
 
- 		sin2a = *csptr++;
 
- 		cps2b = *csptr++;	
 
- 		sin2b = *csptr++;
 
- 		ar1 = *(zbuf1 + 0) >> es;
 
- 		ai1 = *(zbuf2 + 0) >> es;
 
- 		ai2 = *(zbuf1 + 1) >> es;
 
- 		t  = MULSHIFT32(sin2a, ar1 + ai1);
 
- 		z2 = MULSHIFT32(cps2a, ai1) - t;
 
- 		cms2 = cps2a - 2*sin2a;
 
- 		z1 = MULSHIFT32(cms2, ar1) + t;
 
- 		*zbuf1++ = z1;
 
- 		*zbuf1++ = z2;
 
- 		ar2 = *(zbuf2 - 1) >> es;	/* do here to free up register used for es */
 
- 		t  = MULSHIFT32(sin2b, ar2 + ai2);
 
- 		z2 = MULSHIFT32(cps2b, ai2) - t;
 
- 		cms2 = cps2b - 2*sin2b;
 
- 		z1 = MULSHIFT32(cms2, ar2) + t;
 
- 		*zbuf2-- = z2;
 
- 		*zbuf2-- = z1;
 
- 	}
 
- }
 
- /**************************************************************************************
 
-  * Function:    PostMultiplyRescale
 
-  *
 
-  * Description: post-twiddle stage of DCT4, with rescaling for extra guard bits
 
-  *
 
-  * Inputs:      table index (for transform size)
 
-  *              buffer of nmdct samples
 
-  *              number of guard bits to remove from output
 
-  *
 
-  * Outputs:     processed samples in same buffer
 
-  *
 
-  * Return:      none
 
-  *
 
-  * Notes:       clips output to [-2^30, 2^30 - 1], guaranteeing at least 1 guard bit
 
-  *              see notes on PostMultiply(), above
 
-  **************************************************************************************/
 
-  /* __attribute__ ((section (".data"))) */ static void PostMultiplyRescale(int tabidx, int *fft1, int es)
 
- {
 
- 	int i, nmdct, ar1, ai1, ar2, ai2, skipFactor, z;
 
- 	int t, cs2, sin2;
 
- 	int *fft2;
 
- 	const int *csptr;
 
- 	nmdct = nmdctTab[tabidx];		
 
- 	csptr = cos1sin1tab;
 
- 	skipFactor = postSkip[tabidx];
 
- 	fft2 = fft1 + nmdct - 1;
 
- 	/* load coeffs for first pass
 
- 	 * cps2 = (cos+sin), sin2 = sin, cms2 = (cos-sin)
 
- 	 */
 
- 	cs2 = *csptr++;
 
- 	sin2 = *csptr;
 
- 	csptr += skipFactor;
 
- 	for (i = nmdct >> 2; i != 0; i--) {
 
- 		ar1 = *(fft1 + 0);
 
- 		ai1 = *(fft1 + 1);
 
- 		ai2 = *(fft2 + 0);
 
- 		t = MULSHIFT32(sin2, ar1 + ai1);
 
- 		z = t - MULSHIFT32(cs2, ai1);	
 
- 		CLIP_2N_SHIFT(z, es);	 
 
- 		*fft2-- = z;
 
- 		cs2 -= 2*sin2;
 
- 		z = t + MULSHIFT32(cs2, ar1);	
 
- 		CLIP_2N_SHIFT(z, es);	 
 
- 		*fft1++ = z;
 
- 		cs2 = *csptr++;
 
- 		sin2 = *csptr;
 
- 		csptr += skipFactor;
 
- 		ar2 = *fft2;
 
- 		ai2 = -ai2;
 
- 		t = MULSHIFT32(sin2, ar2 + ai2);
 
- 		z = t - MULSHIFT32(cs2, ai2);	
 
- 		CLIP_2N_SHIFT(z, es);	 
 
- 		*fft2-- = z;
 
- 		cs2 -= 2*sin2;
 
- 		z = t + MULSHIFT32(cs2, ar2);	
 
- 		CLIP_2N_SHIFT(z, es);	 
 
- 		*fft1++ = z;
 
- 		cs2 += 2*sin2;
 
- 	}
 
- }
 
- /**************************************************************************************
 
-  * Function:    DCT4
 
-  *
 
-  * Description: type-IV DCT
 
-  *
 
-  * Inputs:      table index (for transform size)
 
-  *              buffer of nmdct samples
 
-  *              number of guard bits in the input buffer
 
-  *
 
-  * Outputs:     processed samples in same buffer
 
-  *
 
-  * Return:      none
 
-  *
 
-  * Notes:       operates in-place
 
-  *              if number of guard bits in input is < GBITS_IN_DCT4, the input is 
 
-  *                scaled (>>) before the DCT4 and rescaled (<<, with clipping) after
 
-  *                the DCT4 (rare)
 
-  *              the output has FBITS_LOST_DCT4 fewer fraction bits than the input
 
-  *              the output will always have at least 1 guard bit (GBITS_IN_DCT4 >= 4)
 
-  *              int bits gained per stage (PreMul + FFT + PostMul)
 
-  *                 short blocks = (-5 + 4 + 2) = 1 total
 
-  *                 long blocks =  (-8 + 7 + 2) = 1 total
 
-  **************************************************************************************/
 
- void DCT4(int tabidx, int *coef, int gb)
 
- {
 
- 	int es;
 
- 	/* fast in-place DCT-IV - adds guard bits if necessary */
 
- 	if (gb < GBITS_IN_DCT4) {
 
- 		es = GBITS_IN_DCT4 - gb;
 
- 		PreMultiplyRescale(tabidx, coef, es);
 
- 		R4FFT(tabidx, coef);
 
- 		PostMultiplyRescale(tabidx, coef, es);
 
- 	} else {
 
- 		PreMultiply(tabidx, coef);
 
- 		R4FFT(tabidx, coef);
 
- 		PostMultiply(tabidx, coef);
 
- 	}
 
- }
 
 
  |