Delaying allocation of the SILK temporary output buffer to reduce peak stack

This commit is contained in:
Jean-Marc Valin 2014-01-07 13:46:06 -05:00
parent 5f807c176f
commit c25836205d

View file

@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
#include "os_support.h"
/************************/
/* Decoder Super Struct */
@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ];
VARDECL( opus_int16, samplesOut1_tmp_storage );
VARDECL( opus_int16, samplesOut1_tmp_storage1 );
VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
int delay_stack_alloc;
SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
ALLOC( samplesOut1_tmp_storage,
decControl->nChannelsInternal*(
channel_state[ 0 ].frame_length + 2 ),
/* Check if the temp buffer fits into the output PCM buffer. If it fits,
we can delay allocating the temp buffer until after the SILK peak stack
usage. We need to use a < and not a <= because of the two extra samples. */
delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
< decControl->API_sampleRate*decControl->nChannelsAPI;
ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
: decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ channel_state[ 0 ].frame_length + 2;
if ( delay_stack_alloc )
{
samplesOut1_tmp[ 0 ] = samplesOut;
samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
} else {
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
}
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co
resample_out_ptr = samplesOut;
}
ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
: ALLOC_NONE,
opus_int16 );
if ( delay_stack_alloc ) {
OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
}
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */