Use dynamic stack allocations in SILK decoder.

This allows the decoder to be compiled with
 NONTHREADSAFE_PSEUDOSTACK to move the memory for large buffers off
 the stack for devices where it is very limited.
This patch only attempts to do this for the decoder.
The encoder still requires more than 10 kB of stack.
This commit is contained in:
Timothy B. Terriberry 2012-09-05 07:35:49 -07:00 committed by Jean-Marc Valin
parent e7f668b3d4
commit 6f2d9f5068
4 changed files with 56 additions and 21 deletions

View file

@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "main.h"
#include "stack_alloc.h"
#include "PLC.h"
#define NB_ATT 2
@ -178,12 +179,17 @@ static inline void silk_PLC_conceal(
opus_int16 rand_scale_Q14;
opus_int16 *B_Q14, *exc_buf_ptr;
opus_int32 *sLPC_Q14_ptr;
opus_int16 exc_buf[ 2 * MAX_SUB_FRAME_LENGTH ];
VARDECL( opus_int16, exc_buf );
opus_int16 A_Q12[ MAX_LPC_ORDER ];
opus_int16 sLTP[ MAX_FRAME_LENGTH ];
opus_int32 sLTP_Q14[ 2 * MAX_FRAME_LENGTH ];
VARDECL( opus_int16, sLTP );
VARDECL( opus_int32, sLTP_Q14 );
silk_PLC_struct *psPLC = &psDec->sPLC;
opus_int32 prevGain_Q10[2];
SAVE_STACK;
ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 );
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6);
prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6);
@ -354,6 +360,7 @@ static inline void silk_PLC_conceal(
for( i = 0; i < MAX_NB_SUBFR; i++ ) {
psDecCtrl->pitchL[ i ] = lag;
}
RESTORE_STACK;
}
/* Glues concealed frames with new good recieved frames */

View file

@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
/************************/
/* Decoder Super Struct */
@ -85,14 +86,16 @@ opus_int silk_Decode( /* O Returns error co
{
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];
opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];
opus_int16 *samplesOut1_tmp[ 2 ];
VARDECL( opus_int16, samplesOut1_tmp_storage );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
silk_decoder *psDec = ( silk_decoder * )decState;
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
SAVE_STACK;
/**********************************/
/* Test if first frame in payload */
@ -132,11 +135,13 @@ opus_int silk_Decode( /* O Returns error co
channel_state[ n ].nb_subfr = 4;
} else {
silk_assert( 0 );
RESTORE_STACK;
return SILK_DEC_INVALID_FRAME_SIZE;
}
fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
silk_assert( 0 );
RESTORE_STACK;
return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
}
ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
@ -153,6 +158,7 @@ opus_int silk_Decode( /* O Returns error co
if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
RESTORE_STACK;
return( ret );
}
@ -240,6 +246,14 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
ALLOC( samplesOut1_tmp_storage,
decControl->nChannelsInternal*(
channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ channel_state[ 0 ].frame_length + 2;
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
} else {
@ -285,6 +299,8 @@ opus_int silk_Decode( /* O Returns error co
*nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
/* Set up pointers to temp buffers */
ALLOC( samplesOut2_tmp,
decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 );
if( decControl->nChannelsAPI == 2 ) {
resample_out_ptr = samplesOut2_tmp;
} else {
@ -337,6 +353,7 @@ opus_int silk_Decode( /* O Returns error co
} else {
psDec->prev_decode_only_middle = decode_only_middle;
}
RESTORE_STACK;
return ret;
}

View file

@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "main.h"
#include "stack_alloc.h"
/**********************************************************/
/* Core decoder. Performs inverse NSQ operation LTP + LPC */
@ -43,15 +44,21 @@ void silk_decode_core(
{
opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ];
opus_int16 sLTP[ MAX_FRAME_LENGTH ];
opus_int32 sLTP_Q15[ 2 * MAX_FRAME_LENGTH ];
VARDECL( opus_int16, sLTP );
VARDECL( opus_int32, sLTP_Q15 );
opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10;
opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14;
opus_int32 res_Q14[ MAX_SUB_FRAME_LENGTH ];
opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];
VARDECL( opus_int32, res_Q14 );
VARDECL( opus_int32, sLPC_Q14 );
SAVE_STACK;
silk_assert( psDec->prev_gain_Q16 != 0 );
ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 );
ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 );
ALLOC( res_Q14, psDec->subfr_length, opus_int32 );
ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 );
offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ];
if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) {
@ -227,4 +234,5 @@ void silk_decode_core(
/* Save LPC state */
silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) );
RESTORE_STACK;
}

View file

@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "main.h"
#include "stack_alloc.h"
#include "PLC.h"
/****************/
@ -44,12 +45,16 @@ opus_int silk_decode_frame(
opus_int condCoding /* I The type of conditional coding to use */
)
{
silk_decoder_control sDecCtrl;
VARDECL( silk_decoder_control, psDecCtrl );
opus_int L, mv_len, ret = 0;
opus_int pulses[ MAX_FRAME_LENGTH ];
VARDECL( opus_int, pulses );
SAVE_STACK;
L = psDec->frame_length;
sDecCtrl.LTP_scale_Q14 = 0;
ALLOC( psDecCtrl, 1, silk_decoder_control );
ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
psDecCtrl->LTP_scale_Q14 = 0;
/* Safety checks */
silk_assert( L > 0 && L <= MAX_FRAME_LENGTH );
@ -71,20 +76,17 @@ opus_int silk_decode_frame(
/********************************************/
/* Decode parameters and pulse signal */
/********************************************/
silk_decode_parameters( psDec, &sDecCtrl, condCoding );
/* Update length. Sampling frequency may have changed */
L = psDec->frame_length;
silk_decode_parameters( psDec, psDecCtrl, condCoding );
/********************************************************/
/* Run inverse NSQ */
/********************************************************/
silk_decode_core( psDec, &sDecCtrl, pOut, pulses );
silk_decode_core( psDec, psDecCtrl, pOut, pulses );
/********************************************************/
/* Update PLC state */
/********************************************************/
silk_PLC( psDec, &sDecCtrl, pOut, 0 );
silk_PLC( psDec, psDecCtrl, pOut, 0 );
psDec->lossCnt = 0;
psDec->prevSignalType = psDec->indices.signalType;
@ -94,7 +96,7 @@ opus_int silk_decode_frame(
psDec->first_frame_after_reset = 0;
} else {
/* Handle packet loss by extrapolation */
silk_PLC( psDec, &sDecCtrl, pOut, 1 );
silk_PLC( psDec, psDecCtrl, pOut, 1 );
}
/*************************/
@ -113,13 +115,14 @@ opus_int silk_decode_frame(
/************************************************/
/* Comfort noise generation / estimation */
/************************************************/
silk_CNG( psDec, &sDecCtrl, pOut, L );
silk_CNG( psDec, psDecCtrl, pOut, L );
/* Update some decoder state variables */
psDec->lagPrev = sDecCtrl.pitchL[ psDec->nb_subfr - 1 ];
psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
/* Set output frame length */
*pN = L;
RESTORE_STACK;
return ret;
}