123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423 |
- /***********************************************************************
- Copyright (c) 2006-2011, Skype Limited. All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- - Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- - Neither the name of Internet Society, IETF or IETF Trust, nor the
- names of specific contributors, may be used to endorse or promote
- products derived from this software without specific prior written
- permission.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
- ***********************************************************************/
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #ifdef FIXED_POINT
- #include "main_FIX.h"
- #define silk_encoder_state_Fxx silk_encoder_state_FIX
- #else
- #include "main_FLP.h"
- #define silk_encoder_state_Fxx silk_encoder_state_FLP
- #endif
- #include "stack_alloc.h"
- #include "tuning_parameters.h"
- #include "pitch_est_defines.h"
- static opus_int silk_setup_resamplers(
- silk_encoder_state_Fxx *psEnc, /* I/O */
- opus_int fs_kHz /* I */
- );
- static opus_int silk_setup_fs(
- silk_encoder_state_Fxx *psEnc, /* I/O */
- opus_int fs_kHz, /* I */
- opus_int PacketSize_ms /* I */
- );
- static opus_int silk_setup_complexity(
- silk_encoder_state *psEncC, /* I/O */
- opus_int Complexity /* I */
- );
- static OPUS_INLINE opus_int silk_setup_LBRR(
- silk_encoder_state *psEncC, /* I/O */
- const silk_EncControlStruct *encControl /* I */
- );
- /* Control encoder */
- opus_int silk_control_encoder(
- silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */
- silk_EncControlStruct *encControl, /* I Control structure */
- const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */
- const opus_int channelNb, /* I Channel number */
- const opus_int force_fs_kHz
- )
- {
- opus_int fs_kHz, ret = 0;
- psEnc->sCmn.useDTX = encControl->useDTX;
- psEnc->sCmn.useCBR = encControl->useCBR;
- psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate;
- psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate;
- psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate;
- psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate;
- psEnc->sCmn.useInBandFEC = encControl->useInBandFEC;
- psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI;
- psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal;
- psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch;
- psEnc->sCmn.channelNb = channelNb;
- if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) {
- if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) {
- /* Change in API sampling rate in the middle of encoding a packet */
- ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz );
- }
- return ret;
- }
- /* Beyond this point we know that there are no previously coded frames in the payload buffer */
- /********************************************/
- /* Determine internal sampling rate */
- /********************************************/
- fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl );
- if( force_fs_kHz ) {
- fs_kHz = force_fs_kHz;
- }
- /********************************************/
- /* Prepare resampler and buffered data */
- /********************************************/
- ret += silk_setup_resamplers( psEnc, fs_kHz );
- /********************************************/
- /* Set internal sampling frequency */
- /********************************************/
- ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms );
- /********************************************/
- /* Set encoding complexity */
- /********************************************/
- ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity );
- /********************************************/
- /* Set packet loss rate measured by farend */
- /********************************************/
- psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage;
- /********************************************/
- /* Set LBRR usage */
- /********************************************/
- ret += silk_setup_LBRR( &psEnc->sCmn, encControl );
- psEnc->sCmn.controlled_since_last_payload = 1;
- return ret;
- }
- static opus_int silk_setup_resamplers(
- silk_encoder_state_Fxx *psEnc, /* I/O */
- opus_int fs_kHz /* I */
- )
- {
- opus_int ret = SILK_NO_ERROR;
- SAVE_STACK;
- if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz )
- {
- if( psEnc->sCmn.fs_kHz == 0 ) {
- /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
- ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 );
- } else {
- VARDECL( opus_int16, x_buf_API_fs_Hz );
- VARDECL( silk_resampler_state_struct, temp_resampler_state );
- #ifdef FIXED_POINT
- opus_int16 *x_bufFIX = psEnc->x_buf;
- #else
- VARDECL( opus_int16, x_bufFIX );
- opus_int32 new_buf_samples;
- #endif
- opus_int32 api_buf_samples;
- opus_int32 old_buf_samples;
- opus_int32 buf_length_ms;
- buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS;
- old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz;
- #ifndef FIXED_POINT
- new_buf_samples = buf_length_ms * fs_kHz;
- ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ),
- opus_int16 );
- silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples );
- #endif
- /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */
- ALLOC( temp_resampler_state, 1, silk_resampler_state_struct );
- ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 );
- /* Calculate number of samples to temporarily upsample */
- api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 );
- /* Temporary resampling of x_buf data to API_fs_Hz */
- ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 );
- ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples );
- /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */
- ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 );
- /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */
- ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples );
- #ifndef FIXED_POINT
- silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples);
- #endif
- }
- }
- psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz;
- RESTORE_STACK;
- return ret;
- }
- static opus_int silk_setup_fs(
- silk_encoder_state_Fxx *psEnc, /* I/O */
- opus_int fs_kHz, /* I */
- opus_int PacketSize_ms /* I */
- )
- {
- opus_int ret = SILK_NO_ERROR;
- /* Set packet size */
- if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) {
- if( ( PacketSize_ms != 10 ) &&
- ( PacketSize_ms != 20 ) &&
- ( PacketSize_ms != 40 ) &&
- ( PacketSize_ms != 60 ) ) {
- ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED;
- }
- if( PacketSize_ms <= 10 ) {
- psEnc->sCmn.nFramesPerPacket = 1;
- psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1;
- psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz );
- psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
- if( psEnc->sCmn.fs_kHz == 8 ) {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
- } else {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
- }
- } else {
- psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS );
- psEnc->sCmn.nb_subfr = MAX_NB_SUBFR;
- psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz );
- psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
- if( psEnc->sCmn.fs_kHz == 8 ) {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
- } else {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
- }
- }
- psEnc->sCmn.PacketSize_ms = PacketSize_ms;
- psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */
- }
- /* Set internal sampling frequency */
- celt_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );
- celt_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 );
- if( psEnc->sCmn.fs_kHz != fs_kHz ) {
- /* reset part of the state */
- silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) );
- silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) );
- silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) );
- silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) );
- psEnc->sCmn.inputBufIx = 0;
- psEnc->sCmn.nFramesEncoded = 0;
- psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */
- /* Initialize non-zero parameters */
- psEnc->sCmn.prevLag = 100;
- psEnc->sCmn.first_frame_after_reset = 1;
- psEnc->sShape.LastGainIndex = 10;
- psEnc->sCmn.sNSQ.lagPrev = 100;
- psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536;
- psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
- psEnc->sCmn.fs_kHz = fs_kHz;
- if( psEnc->sCmn.fs_kHz == 8 ) {
- if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF;
- } else {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF;
- }
- } else {
- if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF;
- } else {
- psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF;
- }
- }
- if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) {
- psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER;
- psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB;
- } else {
- psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER;
- psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB;
- }
- psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz;
- psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
- psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz );
- psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz );
- psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz );
- if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) {
- psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz );
- } else {
- psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz );
- }
- if( psEnc->sCmn.fs_kHz == 16 ) {
- psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF;
- } else if( psEnc->sCmn.fs_kHz == 12 ) {
- psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF;
- } else {
- psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF;
- }
- }
- /* Check that settings are valid */
- celt_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length );
- return ret;
- }
- static opus_int silk_setup_complexity(
- silk_encoder_state *psEncC, /* I/O */
- opus_int Complexity /* I */
- )
- {
- opus_int ret = 0;
- /* Set encoding complexity */
- celt_assert( Complexity >= 0 && Complexity <= 10 );
- if( Complexity < 1 ) {
- psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 );
- psEncC->pitchEstimationLPCOrder = 6;
- psEncC->shapingLPCOrder = 12;
- psEncC->la_shape = 3 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = 1;
- psEncC->useInterpolatedNLSFs = 0;
- psEncC->NLSF_MSVQ_Survivors = 2;
- psEncC->warping_Q16 = 0;
- } else if( Complexity < 2 ) {
- psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 );
- psEncC->pitchEstimationLPCOrder = 8;
- psEncC->shapingLPCOrder = 14;
- psEncC->la_shape = 5 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = 1;
- psEncC->useInterpolatedNLSFs = 0;
- psEncC->NLSF_MSVQ_Survivors = 3;
- psEncC->warping_Q16 = 0;
- } else if( Complexity < 3 ) {
- psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 );
- psEncC->pitchEstimationLPCOrder = 6;
- psEncC->shapingLPCOrder = 12;
- psEncC->la_shape = 3 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = 2;
- psEncC->useInterpolatedNLSFs = 0;
- psEncC->NLSF_MSVQ_Survivors = 2;
- psEncC->warping_Q16 = 0;
- } else if( Complexity < 4 ) {
- psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 );
- psEncC->pitchEstimationLPCOrder = 8;
- psEncC->shapingLPCOrder = 14;
- psEncC->la_shape = 5 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = 2;
- psEncC->useInterpolatedNLSFs = 0;
- psEncC->NLSF_MSVQ_Survivors = 4;
- psEncC->warping_Q16 = 0;
- } else if( Complexity < 6 ) {
- psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 );
- psEncC->pitchEstimationLPCOrder = 10;
- psEncC->shapingLPCOrder = 16;
- psEncC->la_shape = 5 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = 2;
- psEncC->useInterpolatedNLSFs = 1;
- psEncC->NLSF_MSVQ_Survivors = 6;
- psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
- } else if( Complexity < 8 ) {
- psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 );
- psEncC->pitchEstimationLPCOrder = 12;
- psEncC->shapingLPCOrder = 20;
- psEncC->la_shape = 5 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = 3;
- psEncC->useInterpolatedNLSFs = 1;
- psEncC->NLSF_MSVQ_Survivors = 8;
- psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
- } else {
- psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX;
- psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 );
- psEncC->pitchEstimationLPCOrder = 16;
- psEncC->shapingLPCOrder = 24;
- psEncC->la_shape = 5 * psEncC->fs_kHz;
- psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES;
- psEncC->useInterpolatedNLSFs = 1;
- psEncC->NLSF_MSVQ_Survivors = 16;
- psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 );
- }
- /* Do not allow higher pitch estimation LPC order than predict LPC order */
- psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder );
- psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape;
- psEncC->Complexity = Complexity;
- celt_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER );
- celt_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER );
- celt_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES );
- celt_assert( psEncC->warping_Q16 <= 32767 );
- celt_assert( psEncC->la_shape <= LA_SHAPE_MAX );
- celt_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX );
- return ret;
- }
- static OPUS_INLINE opus_int silk_setup_LBRR(
- silk_encoder_state *psEncC, /* I/O */
- const silk_EncControlStruct *encControl /* I */
- )
- {
- opus_int LBRR_in_previous_packet, ret = SILK_NO_ERROR;
- LBRR_in_previous_packet = psEncC->LBRR_enabled;
- psEncC->LBRR_enabled = encControl->LBRR_coded;
- if( psEncC->LBRR_enabled ) {
- /* Set gain increase for coding LBRR excitation */
- if( LBRR_in_previous_packet == 0 ) {
- /* Previous packet did not have LBRR, and was therefore coded at a higher bitrate */
- psEncC->LBRR_GainIncreases = 7;
- } else {
- psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 );
- }
- }
- return ret;
- }
|