Delaying allocation of the SILK temporary output buffer to reduce peak stack

This commit is contained in:
Jean-Marc Valin 2014-01-07 13:46:06 -05:00
parent 5f807c176f
commit c25836205d

View file

@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "API.h" #include "API.h"
#include "main.h" #include "main.h"
#include "stack_alloc.h" #include "stack_alloc.h"
#include "os_support.h"
/************************/ /************************/
/* Decoder Super Struct */ /* Decoder Super Struct */
@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol; opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ]; opus_int16 *samplesOut1_tmp[ 2 ];
VARDECL( opus_int16, samplesOut1_tmp_storage ); VARDECL( opus_int16, samplesOut1_tmp_storage1 );
VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp ); VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 }; opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr; opus_int16 *resample_out_ptr;
@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co
silk_decoder_state *channel_state = psDec->channel_state; silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side; opus_int has_side;
opus_int stereo_to_mono; opus_int stereo_to_mono;
int delay_stack_alloc;
SAVE_STACK; SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1; psDec->channel_state[ 1 ].first_frame_after_reset = 1;
} }
ALLOC( samplesOut1_tmp_storage, /* Check if the temp buffer fits into the output PCM buffer. If it fits,
decControl->nChannelsInternal*( we can delay allocating the temp buffer until after the SILK peak stack
channel_state[ 0 ].frame_length + 2 ), usage. We need to use a < and not a <= because of the two extra samples. */
delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
< decControl->API_sampleRate*decControl->nChannelsAPI;
ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
: decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 ); opus_int16 );
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; if ( delay_stack_alloc )
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage {
+ channel_state[ 0 ].frame_length + 2; samplesOut1_tmp[ 0 ] = samplesOut;
samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
} else {
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
}
if( lostFlag == FLAG_DECODE_NORMAL ) { if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle; has_side = !decode_only_middle;
@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co
resample_out_ptr = samplesOut; resample_out_ptr = samplesOut;
} }
ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
: ALLOC_NONE,
opus_int16 );
if ( delay_stack_alloc ) {
OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
}
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */ /* Resample decoded signal to API_sampleRate */