Initial support for a managed stack/scratchpad. Still needs some work.

This commit is contained in:
Jean-Marc Valin 2008-02-29 15:14:12 +11:00
parent 8b2a59235f
commit 8600f69f79
12 changed files with 114 additions and 18 deletions

View file

@ -118,20 +118,24 @@ void renormalise_bands(const CELTMode *m, celt_norm_t *X)
int i;
VARDECL(celt_ener_t *tmpE);
VARDECL(celt_sig_t *freq);
SAVE_STACK;
ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
ALLOC(freq, m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1], celt_sig_t);
for (i=0;i<m->nbMdctBlocks*m->nbChannels*m->eBands[m->nbEBands+1];i++)
freq[i] = SHL32(EXTEND32(X[i]), 10);
compute_band_energies(m, freq, tmpE);
normalise_bands(m, freq, X, tmpE);
RESTORE_STACK;
}
#else
void renormalise_bands(const CELTMode *m, celt_norm_t *X)
{
VARDECL(celt_ener_t *tmpE);
SAVE_STACK;
ALLOC(tmpE, m->nbEBands*m->nbChannels, celt_ener_t);
compute_band_energies(m, X, tmpE);
normalise_bands(m, X, X, tmpE);
RESTORE_STACK;
}
#endif
@ -224,6 +228,7 @@ void quant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, celt_mask_t
VARDECL(celt_norm_t *norm);
VARDECL(int *pulses);
VARDECL(int *offsets);
SAVE_STACK;
B = m->nbMdctBlocks*m->nbChannels;
@ -277,6 +282,7 @@ void quant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, celt_mask_t
}
for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
X[i] = 0;
RESTORE_STACK;
}
/* Decoding of the residual */
@ -288,6 +294,7 @@ void unquant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, int total_
VARDECL(celt_norm_t *norm);
VARDECL(int *pulses);
VARDECL(int *offsets);
SAVE_STACK;
B = m->nbMdctBlocks*m->nbChannels;
@ -335,6 +342,7 @@ void unquant_bands(const CELTMode *m, celt_norm_t *X, celt_norm_t *P, int total_
}
for (i=B*eBands[m->nbEBands];i<B*eBands[m->nbEBands+1];i++)
X[i] = 0;
RESTORE_STACK;
}
void stereo_mix(const CELTMode *m, celt_norm_t *X, celt_ener_t *bank, int dir)

View file

@ -33,6 +33,8 @@
#include "config.h"
#endif
#define CELT_C
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@ -182,6 +184,7 @@ static celt_word32_t compute_mdcts(mdct_lookup *mdct_lookup, celt_word16_t *wind
celt_word32_t E = 0;
VARDECL(celt_word32_t *x);
VARDECL(celt_word32_t *tmp);
SAVE_STACK;
ALLOC(x, 2*N, celt_word32_t);
ALLOC(tmp, N, celt_word32_t);
for (c=0;c<C;c++)
@ -200,6 +203,7 @@ static celt_word32_t compute_mdcts(mdct_lookup *mdct_lookup, celt_word16_t *wind
out[C*B*j+C*i+c] = tmp[j];
}
}
RESTORE_STACK;
return E;
}
@ -209,6 +213,7 @@ static void compute_inv_mdcts(mdct_lookup *mdct_lookup, celt_word16_t *window, c
int i, c, N4;
VARDECL(celt_word32_t *x);
VARDECL(celt_word32_t *tmp);
SAVE_STACK;
ALLOC(x, 2*N, celt_word32_t);
ALLOC(tmp, N, celt_word32_t);
N4 = (N-overlap)/2;
@ -231,6 +236,7 @@ static void compute_inv_mdcts(mdct_lookup *mdct_lookup, celt_word16_t *window, c
mdct_overlap[C*j+c] = x[N+N4+j];
}
}
RESTORE_STACK;
}
int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, int nbCompressedBytes)
@ -245,6 +251,7 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
VARDECL(celt_norm_t *P);
VARDECL(celt_ener_t *bandE);
VARDECL(celt_pgain_t *gains);
SAVE_STACK;
if (check_mode(st->mode) != CELT_OK)
return CELT_INVALID_MODE;
@ -414,6 +421,7 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
if (nbBytes > nbCompressedBytes)
{
celt_warning_int ("got too many bytes:", nbBytes);
RESTORE_STACK;
return CELT_INTERNAL_ERROR;
}
/*printf ("%d\n", *nbBytes);*/
@ -427,6 +435,7 @@ int celt_encode(CELTEncoder *st, celt_int16_t *pcm, unsigned char *compressed, i
ec_byte_reset(&st->buf);
ec_enc_init(&st->enc,&st->buf);
RESTORE_STACK;
return nbCompressedBytes;
}
@ -539,6 +548,7 @@ static void celt_decode_lost(CELTDecoder *st, short *pcm)
int i, c, N, B, C;
int pitch_index;
VARDECL(celt_sig_t *freq);
SAVE_STACK;
N = st->block_size;
B = st->nb_blocks;
C = st->mode->nbChannels;
@ -567,6 +577,7 @@ static void celt_decode_lost(CELTDecoder *st, short *pcm)
}
}
}
RESTORE_STACK;
}
int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm)
@ -581,6 +592,7 @@ int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm
VARDECL(celt_norm_t *P);
VARDECL(celt_ener_t *bandE);
VARDECL(celt_pgain_t *gains);
SAVE_STACK;
if (check_mode(st->mode) != CELT_OK)
return CELT_INVALID_MODE;
@ -596,10 +608,14 @@ int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm
ALLOC(gains, st->mode->nbPBands, celt_pgain_t);
if (check_mode(st->mode) != CELT_OK)
{
RESTORE_STACK;
return CELT_INVALID_MODE;
}
if (data == NULL)
{
celt_decode_lost(st, pcm);
RESTORE_STACK;
return 0;
}
@ -676,12 +692,14 @@ int celt_decode(CELTDecoder *st, unsigned char *data, int len, celt_int16_t *pcm
if (ec_dec_uint(&dec, 2) != val)
{
celt_warning("decode error");
RESTORE_STACK;
return CELT_CORRUPTED_DATA;
}
val = 1-val;
}
}
RESTORE_STACK;
return 0;
/*printf ("\n");*/
}

View file

@ -111,13 +111,17 @@ static void prev_ncwrs64(celt_uint64_t *nc, int len, int nc0)
celt_uint32_t ncwrs(int _n,int _m)
{
int i;
celt_uint32_t ret;
VARDECL(celt_uint32_t *nc);
SAVE_STACK;
ALLOC(nc,_n+1, celt_uint32_t);
for (i=0;i<_n+1;i++)
nc[i] = 1;
for (i=0;i<_m;i++)
next_ncwrs32(nc, _n+1, 0);
return nc[_n];
ret = nc[_n];
RESTORE_STACK;
return ret;
}
/*Returns the numer of ways of choosing _m elements from a set of size _n with
@ -125,13 +129,17 @@ celt_uint32_t ncwrs(int _n,int _m)
celt_uint64_t ncwrs64(int _n,int _m)
{
int i;
celt_uint64_t ret;
VARDECL(celt_uint64_t *nc);
SAVE_STACK;
ALLOC(nc,_n+1, celt_uint64_t);
for (i=0;i<_n+1;i++)
nc[i] = 1;
for (i=0;i<_m;i++)
next_ncwrs64(nc, _n+1, 0);
return nc[_n];
ret = nc[_n];
RESTORE_STACK;
return ret;
}
@ -143,6 +151,7 @@ void cwrsi(int _n,int _m,celt_uint32_t _i,int *_x,int *_s){
int j;
int k;
VARDECL(celt_uint32_t *nc);
SAVE_STACK;
ALLOC(nc,_n+1, celt_uint32_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@ -176,6 +185,7 @@ void cwrsi(int _n,int _m,celt_uint32_t _i,int *_x,int *_s){
else
prev_ncwrs32(nc, _n+1, 1);
}
RESTORE_STACK;
}
/*Returns the index of the given combination of _m elements chosen from a set
@ -187,6 +197,7 @@ celt_uint32_t icwrs(int _n,int _m,const int *_x,const int *_s, celt_uint32_t *bo
int j;
int k;
VARDECL(celt_uint32_t *nc);
SAVE_STACK;
ALLOC(nc,_n+1, celt_uint32_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@ -218,6 +229,7 @@ celt_uint32_t icwrs(int _n,int _m,const int *_x,const int *_s, celt_uint32_t *bo
}
if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
}
RESTORE_STACK;
return i;
}
@ -229,6 +241,7 @@ void cwrsi64(int _n,int _m,celt_uint64_t _i,int *_x,int *_s){
int j;
int k;
VARDECL(celt_uint64_t *nc);
SAVE_STACK;
ALLOC(nc,_n+1, celt_uint64_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@ -262,6 +275,7 @@ void cwrsi64(int _n,int _m,celt_uint64_t _i,int *_x,int *_s){
else
prev_ncwrs64(nc, _n+1, 1);
}
RESTORE_STACK;
}
/*Returns the index of the given combination of _m elements chosen from a set
@ -273,6 +287,7 @@ celt_uint64_t icwrs64(int _n,int _m,const int *_x,const int *_s, celt_uint64_t *
int j;
int k;
VARDECL(celt_uint64_t *nc);
SAVE_STACK;
ALLOC(nc,_n+1, celt_uint64_t);
for (j=0;j<_n+1;j++)
nc[j] = 1;
@ -304,6 +319,7 @@ celt_uint64_t icwrs64(int _n,int _m,const int *_x,const int *_s, celt_uint64_t *
}
if((k==0||_x[k]!=_x[k-1])&&_s[k])i+=p>>1;
}
RESTORE_STACK;
return i;
}
@ -350,6 +366,7 @@ void encode_pulses(int *_y, int N, int K, ec_enc *enc)
{
VARDECL(int *comb);
VARDECL(int *signs);
SAVE_STACK;
ALLOC(comb, K, int);
ALLOC(signs, K, int);
@ -366,12 +383,14 @@ void encode_pulses(int *_y, int N, int K, ec_enc *enc)
id = icwrs64(N, K, comb, signs, &bound);
ec_enc_uint64(enc,id,bound);
}
RESTORE_STACK;
}
void decode_pulses(int *_y, int N, int K, ec_dec *dec)
{
VARDECL(int *comb);
VARDECL(int *signs);
SAVE_STACK;
ALLOC(comb, K, int);
ALLOC(signs, K, int);
@ -383,5 +402,6 @@ void decode_pulses(int *_y, int N, int K, ec_dec *dec)
cwrsi64(N, K, ec_dec_uint64(dec, ncwrs64(N, K)), comb, signs);
comb2pulse(N, K, _y, comb, signs);
}
RESTORE_STACK;
}

View file

@ -91,6 +91,7 @@ void mdct_forward(mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out)
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar *f);
SAVE_STACK;
N = l->n;
N2 = N/2;
N4 = N/4;
@ -126,6 +127,7 @@ void mdct_forward(mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out)
out[2*i] = -S_MUL(f[2*i+1],l->trig[i+N4]) + S_MUL(f[2*i] ,l->trig[i]);
out[N2-1-2*i] = -S_MUL(f[2*i] ,l->trig[i+N4]) - S_MUL(f[2*i+1],l->trig[i]);
}
RESTORE_STACK;
}
@ -134,6 +136,7 @@ void mdct_backward(mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out)
int i;
int N, N2, N4, N8;
VARDECL(kiss_fft_scalar *f);
SAVE_STACK;
N = l->n;
N2 = N/2;
N4 = N/4;
@ -173,6 +176,7 @@ void mdct_backward(mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out)
out[i] =-out[N2-i-1];
out[N-i-1] = out[N2+i];
}
RESTORE_STACK;
}

View file

@ -53,6 +53,7 @@ void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t *
VARDECL(celt_word32_t *X);
VARDECL(celt_word32_t *Y);
VARDECL(celt_mask_t *curve);
SAVE_STACK;
int n2 = lag/2;
ALLOC(xx, lag*C, celt_word32_t);
ALLOC(yy, lag*C, celt_word32_t);
@ -109,4 +110,5 @@ void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t *
/*printf ("\n");
printf ("%d %f\n", *pitch, max_corr);
printf ("%d\n", *pitch);*/
RESTORE_STACK;
}

View file

@ -126,6 +126,7 @@ void compute_masking(struct PsyDecay *decay, celt_word32_t *X, celt_mask_t *mask
{
int i;
VARDECL(float *psd);
SAVE_STACK;
int N=len/2;
ALLOC(psd, N, float);
psd[0] = X[0]*1.f*X[0];
@ -134,13 +135,14 @@ void compute_masking(struct PsyDecay *decay, celt_word32_t *X, celt_mask_t *mask
/* TODO: Do tone masking */
/* Noise masking */
spreading_func(decay, psd, mask, N);
RESTORE_STACK;
}
void compute_mdct_masking(struct PsyDecay *decay, celt_word32_t *X, celt_mask_t *mask, int len)
{
int i;
VARDECL(float *psd);
SAVE_STACK;
ALLOC(psd, len, float);
for (i=0;i<len;i++)
mask[i] = X[i]*X[i];
@ -152,5 +154,5 @@ void compute_mdct_masking(struct PsyDecay *decay, celt_word32_t *X, celt_mask_t
/* TODO: Do tone masking */
/* Noise masking */
spreading_func(decay, psd, mask, len);
RESTORE_STACK;
}

View file

@ -90,6 +90,7 @@ static void quant_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word1
celt_word16_t prev = 0;
celt_word16_t coef = m->ePredCoef;
VARDECL(celt_word16_t *error);
SAVE_STACK;
/* The .7 is a heuristic */
celt_word16_t beta = MULT16_16_Q15(QCONST16(.7f,15),coef);
@ -157,6 +158,7 @@ static void quant_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word1
/*printf ("%d\n", ec_enc_tell(enc, 0)-9);*/
/*printf ("\n");*/
RESTORE_STACK;
}
static void unquant_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_dec *dec)
@ -210,6 +212,7 @@ static void unquant_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_wor
void quant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_enc *enc)
{
int C;
SAVE_STACK;
C = m->nbChannels;
@ -263,6 +266,7 @@ void quant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBan
celt_fatal("more than 2 channels not supported");
}
#endif
RESTORE_STACK;
}
@ -270,6 +274,7 @@ void quant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBan
void unquant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, ec_dec *dec)
{
int C;
SAVE_STACK;
C = m->nbChannels;
if (C==1)
@ -286,4 +291,5 @@ void unquant_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEB
eBands[C*i+c] = E[i];
}
}
RESTORE_STACK;
}

View file

@ -66,6 +66,7 @@ int quant_pitch(celt_pgain_t *gains, int len, ec_enc *enc)
{
int i, id;
VARDECL(float *g2);
SAVE_STACK;
ALLOC(g2, len, float);
/*for (i=0;i<len;i++) printf ("%f ", gains[i]);printf ("\n");*/
for (i=0;i<len;i++)
@ -75,6 +76,7 @@ int quant_pitch(celt_pgain_t *gains, int len, ec_enc *enc)
/*for (i=0;i<len;i++) printf ("%f ", pgain_table[id*len+i]);printf ("\n");*/
for (i=0;i<len;i++)
gains[i] = PGAIN_SCALING*(sqrt(1-(1-pgain_table[id*len+i])*(1-pgain_table[id*len+i])));
RESTORE_STACK;
return id!=0;
}

View file

@ -192,6 +192,7 @@ int interp_bits2pulses(const CELTMode *m, int *bits1, int *bits2, int total, int
int j;
int firstpass;
VARDECL(int *bits);
SAVE_STACK;
ALLOC(bits, len, int);
lo = 0;
hi = 1<<BITRES;
@ -235,14 +236,16 @@ int interp_bits2pulses(const CELTMode *m, int *bits1, int *bits2, int total, int
break;
}
}
RESTORE_STACK;
return (out+BITROUND) >> BITRES;
}
int compute_allocation(const CELTMode *m, int *offsets, int total, int *pulses)
{
int lo, hi, len;
int lo, hi, len, ret;
VARDECL(int *bits1);
VARDECL(int *bits2);
SAVE_STACK;
len = m->nbEBands;
ALLOC(bits1, len, int);
@ -278,7 +281,9 @@ int compute_allocation(const CELTMode *m, int *offsets, int total, int *pulses)
if (bits2[j] < 0)
bits2[j] = 0;
}
return interp_bits2pulses(m, bits1, bits2, total, pulses, len);
ret = interp_bits2pulses(m, bits1, bits2, total, pulses, len);
RESTORE_STACK;
return ret;
}
}

View file

@ -84,32 +84,52 @@
* @param type Type of element
*/
#if defined(VAR_ARRAYS)
#define VARDECL(var)
#define ALLOC(var, size, type) type var[size]
#define SAVE_STACK
#define RESTORE_STACK
#elif defined(USE_ALLOCA)
#define VARDECL(var) var
#define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
#define SAVE_STACK
#define RESTORE_STACK
#else
#ifdef ENABLE_VALGRIND
#include <valgrind/memcheck.h>
#define ALLOC_STACK(stack) (stack = (stack==0) ? celt_alloc_scratch(30000) : stack, VALGRIND_MAKE_NOACCESS(stack, 1000))
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
#define PUSH(stack, size, type) (VALGRIND_MAKE_NOACCESS(stack, 1000),ALIGN((stack),sizeof(type)),VALGRIND_MAKE_WRITABLE(stack, ((size)*sizeof(type))),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_NOACCESS(global_stack, 1000))
#else
#define ALLOC_STACK(stack) (stack = (stack==0) ? celt_alloc_scratch(30000) : stack)
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)),(stack)+=((size)*sizeof(type)),(type*)((stack)-((size)*sizeof(type))))
#define RESTORE_STACK (global_stack = _saved_stack)
#endif
#if defined(VAR_ARRAYS)
#define VARDECL(var)
#define ALLOC(var, size, type) type var[size]
#elif defined(USE_ALLOCA)
#define VARDECL(var) var
#define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size)))
#ifdef CELT_C
char *global_stack=0;
#else
/*#define VARDECL(var) var
#define ALLOC(var, size, type) var = PUSH(stack, size, type)*/
#error scratchpad not yet supported, you need to define either VAR_ARRAYS or USE_ALLOCA
extern char *global_stack;
#endif
#include "os_support.h"
#define VARDECL(var) var
#define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
#define SAVE_STACK char *_saved_stack; ALLOC_STACK(global_stack);_saved_stack = global_stack;
#endif

View file

@ -106,6 +106,7 @@ int main(int argc, char *argv[])
{
VARDECL(celt_int16_t *in);
VARDECL(celt_int16_t *out);
SAVE_STACK;
ALLOC(in, frame_size*channels, celt_int16_t);
ALLOC(out, frame_size*channels, celt_int16_t);
fread(in, sizeof(short), frame_size*channels, fin);
@ -115,6 +116,7 @@ int main(int argc, char *argv[])
if (len <= 0)
{
fprintf (stderr, "celt_encode() returned %d\n", len);
RESTORE_STACK;
return 1;
}
/* This is to simulate packet loss */
@ -132,6 +134,7 @@ int main(int argc, char *argv[])
count++;
fwrite(out, sizeof(short), (frame_size-skip)*channels, fout);
skip = 0;
RESTORE_STACK;
}
celt_encoder_destroy(enc);
celt_decoder_destroy(dec);

View file

@ -69,6 +69,7 @@ static void mix_pitch_and_residual(int *iy, celt_norm_t *X, int N, int K, celt_n
celt_word32_t Ryp, Ryy, Rpp;
celt_word32_t g;
VARDECL(celt_norm_t *y);
SAVE_STACK;
#ifdef FIXED_POINT
int yshift = 14-EC_ILOG(K);
#endif
@ -104,6 +105,7 @@ static void mix_pitch_and_residual(int *iy, celt_norm_t *X, int N, int K, celt_n
for (i=0;i<N;i++)
X[i] = P[i] + MULT16_32_Q14(y[i], g);
RESTORE_STACK;
}
/** All the info necessary to keep track of a hypothesis during the search */
@ -128,6 +130,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, celt_norm_t *P, cel
VARDECL(celt_norm_t **ny);
VARDECL(int **iy);
VARDECL(int **iny);
SAVE_STACK;
int i, j, k, m;
int pulsesLeft;
VARDECL(celt_word32_t *xy);
@ -342,6 +345,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, celt_norm_t *P, cel
due to the recursive computation used in quantisation.
Not quite sure whether we need that or not */
mix_pitch_and_residual(iy[0], X, N, K, P, alpha);
RESTORE_STACK;
}
/** Decode pulse vector and combine the result with the pitch vector to produce
@ -349,9 +353,11 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, celt_norm_t *P, cel
void alg_unquant(celt_norm_t *X, int N, int K, celt_norm_t *P, celt_word16_t alpha, ec_dec *dec)
{
VARDECL(int *iy);
SAVE_STACK;
ALLOC(iy, N, int);
decode_pulses(iy, N, K, dec);
mix_pitch_and_residual(iy, X, N, K, P, alpha);
RESTORE_STACK;
}