Sharing twiddle factors across all MDCTs

This commit is contained in:
Jean-Marc Valin 2010-07-07 21:26:38 -04:00
parent 268caad4e8
commit 72513f3cc7
7 changed files with 58 additions and 50 deletions

View file

@ -317,11 +317,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
const int C = CHANNELS(_C); const int C = CHANNELS(_C);
if (C==1 && !shortBlocks) if (C==1 && !shortBlocks)
{ {
const mdct_lookup *lookup = &mode->mdct[LM];
const int overlap = OVERLAP(mode); const int overlap = OVERLAP(mode);
clt_mdct_forward(lookup, in, out, mode->window, overlap); clt_mdct_forward(&mode->mdct, in, out, mode->window, overlap, mode->maxLM-LM);
} else { } else {
const mdct_lookup *lookup = &mode->mdct[LM];
const int overlap = OVERLAP(mode); const int overlap = OVERLAP(mode);
int N = mode->shortMdctSize<<LM; int N = mode->shortMdctSize<<LM;
int B = 1; int B = 1;
@ -331,7 +329,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
SAVE_STACK; SAVE_STACK;
if (shortBlocks) if (shortBlocks)
{ {
lookup = &mode->mdct[0]; /*lookup = &mode->mdct[0];*/
N = mode->shortMdctSize; N = mode->shortMdctSize;
B = shortBlocks; B = shortBlocks;
} }
@ -344,7 +342,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
int j; int j;
for (j=0;j<N+overlap;j++) for (j=0;j<N+overlap;j++)
x[j] = in[C*(b*N+j)+c]; x[j] = in[C*(b*N+j)+c];
clt_mdct_forward(lookup, x, tmp, mode->window, overlap); clt_mdct_forward(&mode->mdct, x, tmp, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
/* Interleaving the sub-frames */ /* Interleaving the sub-frames */
for (j=0;j<N;j++) for (j=0;j<N;j++)
out[(j*B+b)+c*N*B] = tmp[j]; out[(j*B+b)+c*N*B] = tmp[j];
@ -367,8 +365,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
{ {
int j; int j;
if (transient_shift==0 && C==1 && !shortBlocks) { if (transient_shift==0 && C==1 && !shortBlocks) {
const mdct_lookup *lookup = &mode->mdct[LM]; clt_mdct_backward(&mode->mdct, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap, mode->maxLM-LM);
clt_mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap);
} else { } else {
VARDECL(celt_word32, x); VARDECL(celt_word32, x);
VARDECL(celt_word32, tmp); VARDECL(celt_word32, tmp);
@ -376,7 +373,6 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
int N2 = N; int N2 = N;
int B = 1; int B = 1;
int n4offset=0; int n4offset=0;
const mdct_lookup *lookup = &mode->mdct[LM];
SAVE_STACK; SAVE_STACK;
ALLOC(x, 2*N, celt_word32); ALLOC(x, 2*N, celt_word32);
@ -384,7 +380,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
if (shortBlocks) if (shortBlocks)
{ {
lookup = &mode->mdct[0]; /*lookup = &mode->mdct[0];*/
N2 = mode->shortMdctSize; N2 = mode->shortMdctSize;
B = shortBlocks; B = shortBlocks;
n4offset = N4; n4offset = N4;
@ -397,7 +393,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
/* De-interleaving the sub-frames */ /* De-interleaving the sub-frames */
for (j=0;j<N2;j++) for (j=0;j<N2;j++)
tmp[j] = X[(j*B+b)+c*N2*B]; tmp[j] = X[(j*B+b)+c*N2*B];
clt_mdct_backward(lookup, tmp, x+n4offset+N2*b, mode->window, overlap); clt_mdct_backward(&mode->mdct, tmp, x+n4offset+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
} }
if (transient_shift > 0) if (transient_shift > 0)

View file

@ -58,18 +58,23 @@
#define M_PI 3.141592653 #define M_PI 3.141592653
#endif #endif
void clt_mdct_init(mdct_lookup *l,int N) void clt_mdct_init(mdct_lookup *l,int N, int maxshift)
{ {
int i; int i;
int N2, N4; int N2, N4;
l->n = N; l->n = N;
N2 = N>>1; N2 = N>>1;
N4 = N>>2; N4 = N>>2;
l->kfft = cpx32_fft_alloc(N>>2); l->kfft = celt_alloc(sizeof(kiss_fft_cfg)*(maxshift+1));
l->maxshift = maxshift;
for (i=0;i<=maxshift;i++)
{
l->kfft[i] = cpx32_fft_alloc(N>>2>>i);
#ifndef ENABLE_TI_DSPLIB55 #ifndef ENABLE_TI_DSPLIB55
if (l->kfft==NULL) if (l->kfft[i]==NULL)
return; return;
#endif #endif
}
l->trig = (kiss_twiddle_scalar*)celt_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); l->trig = (kiss_twiddle_scalar*)celt_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
if (l->trig==NULL) if (l->trig==NULL)
return; return;
@ -90,11 +95,14 @@ void clt_mdct_init(mdct_lookup *l,int N)
void clt_mdct_clear(mdct_lookup *l) void clt_mdct_clear(mdct_lookup *l)
{ {
cpx32_fft_free(l->kfft); int i;
for (i=0;i<=l->maxshift;i++)
cpx32_fft_free(l->kfft[i]);
celt_free(l->kfft);
celt_free(l->trig); celt_free(l->trig);
} }
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap) void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap, int shift)
{ {
int i; int i;
int N, N2, N4; int N, N2, N4;
@ -102,6 +110,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
VARDECL(kiss_fft_scalar, f); VARDECL(kiss_fft_scalar, f);
SAVE_STACK; SAVE_STACK;
N = l->n; N = l->n;
N >>= shift;
N2 = N>>1; N2 = N>>1;
N4 = N>>2; N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar); ALLOC(f, N2, kiss_fft_scalar);
@ -161,8 +170,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
kiss_fft_scalar re, im, yr, yi; kiss_fft_scalar re, im, yr, yi;
re = yp[0]; re = yp[0];
im = yp[1]; im = yp[1];
yr = -S_MUL(re,t[i]) - S_MUL(im,t[N4-i]); yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
yi = -S_MUL(im,t[i]) + S_MUL(re,t[N4-i]); yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
/* works because the cos is nearly one */ /* works because the cos is nearly one */
*yp++ = yr + S_MUL(yi,sine); *yp++ = yr + S_MUL(yi,sine);
*yp++ = yi - S_MUL(yr,sine); *yp++ = yi - S_MUL(yr,sine);
@ -170,7 +179,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
} }
/* N/4 complex FFT, down-scales by 4/N */ /* N/4 complex FFT, down-scales by 4/N */
cpx32_fft(l->kfft, out, f, N4); cpx32_fft(l->kfft[shift], out, f, N4);
/* Post-rotate */ /* Post-rotate */
{ {
@ -183,8 +192,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
for(i=0;i<N4;i++) for(i=0;i<N4;i++)
{ {
kiss_fft_scalar yr, yi; kiss_fft_scalar yr, yi;
yr = S_MUL(fp[1],t[N4-i]) + S_MUL(fp[0],t[i]); yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
yi = S_MUL(fp[0],t[N4-i]) - S_MUL(fp[1],t[i]); yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
/* works because the cos is nearly one */ /* works because the cos is nearly one */
*yp1 = yr - S_MUL(yi,sine); *yp1 = yr - S_MUL(yi,sine);
*yp2 = yi + S_MUL(yr,sine);; *yp2 = yi + S_MUL(yr,sine);;
@ -197,7 +206,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
} }
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap) void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap, int shift)
{ {
int i; int i;
int N, N2, N4; int N, N2, N4;
@ -206,6 +215,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
VARDECL(kiss_fft_scalar, f2); VARDECL(kiss_fft_scalar, f2);
SAVE_STACK; SAVE_STACK;
N = l->n; N = l->n;
N >>= shift;
N2 = N>>1; N2 = N>>1;
N4 = N>>2; N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar); ALLOC(f, N2, kiss_fft_scalar);
@ -227,8 +237,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
for(i=0;i<N4;i++) for(i=0;i<N4;i++)
{ {
kiss_fft_scalar yr, yi; kiss_fft_scalar yr, yi;
yr = -S_MUL(*xp2, t[i]) + S_MUL(*xp1,t[N4-i]); yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
yi = -S_MUL(*xp2, t[N4-i]) - S_MUL(*xp1,t[i]); yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
/* works because the cos is nearly one */ /* works because the cos is nearly one */
*yp++ = yr - S_MUL(yi,sine); *yp++ = yr - S_MUL(yi,sine);
*yp++ = yi + S_MUL(yr,sine); *yp++ = yi + S_MUL(yr,sine);
@ -238,7 +248,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
} }
/* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
cpx32_ifft(l->kfft, f2, f, N4); cpx32_ifft(l->kfft[shift], f2, f, N4);
/* Post-rotate */ /* Post-rotate */
{ {
@ -251,8 +261,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
re = fp[0]; re = fp[0];
im = fp[1]; im = fp[1];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */ /* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t[i]) - S_MUL(im,t[N4-i]); yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
yi = S_MUL(im,t[i]) + S_MUL(re,t[N4-i]); yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
/* works because the cos is nearly one */ /* works because the cos is nearly one */
*fp++ = yr - S_MUL(yi,sine); *fp++ = yr - S_MUL(yi,sine);
*fp++ = yi + S_MUL(yr,sine); *fp++ = yi + S_MUL(yr,sine);

View file

@ -51,18 +51,19 @@
typedef struct { typedef struct {
int n; int n;
kiss_fft_cfg kfft; int maxshift;
kiss_fft_cfg *kfft;
kiss_twiddle_scalar * restrict trig; kiss_twiddle_scalar * restrict trig;
} mdct_lookup; } mdct_lookup;
void clt_mdct_init(mdct_lookup *l,int N); void clt_mdct_init(mdct_lookup *l,int N, int maxshift);
void clt_mdct_clear(mdct_lookup *l); void clt_mdct_clear(mdct_lookup *l);
/** Compute a forward MDCT and scale by 4/N */ /** Compute a forward MDCT and scale by 4/N */
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 *window, int overlap); void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 *window, int overlap, int shift);
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
(scales implicitly by 1/2) */ (scales implicitly by 1/2) */
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 * restrict window, int overlap); void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 * restrict window, int overlap, int shift);
#endif #endif

View file

@ -261,6 +261,7 @@ static void compute_allocation_table(CELTMode *mode, int res)
CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error) CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
{ {
int i; int i;
int LM;
#ifdef STDIN_TUNING #ifdef STDIN_TUNING
scanf("%d ", &MIN_BINS); scanf("%d ", &MIN_BINS);
scanf("%d ", &BITALLOC_SIZE); scanf("%d ", &BITALLOC_SIZE);
@ -337,18 +338,20 @@ CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
if (frame_size >= 640 && (frame_size%16)==0) if (frame_size >= 640 && (frame_size%16)==0)
{ {
mode->nbShortMdcts = 8; LM = 3;
} else if (frame_size >= 320 && (frame_size%8)==0) } else if (frame_size >= 320 && (frame_size%8)==0)
{ {
mode->nbShortMdcts = 4; LM = 2;
} else if (frame_size >= 160 && (frame_size%4)==0) } else if (frame_size >= 160 && (frame_size%4)==0)
{ {
mode->nbShortMdcts = 2; LM = 1;
} else } else
{ {
mode->nbShortMdcts = 1; LM = 0;
} }
mode->maxLM = LM;
mode->nbShortMdcts = 1<<LM;
mode->shortMdctSize = frame_size/mode->nbShortMdcts; mode->shortMdctSize = frame_size/mode->nbShortMdcts;
res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize); res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);
@ -402,16 +405,14 @@ CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
mode->logN = logN; mode->logN = logN;
#endif /* !STATIC_MODES */ #endif /* !STATIC_MODES */
for (i=0;(1<<i)<=mode->nbShortMdcts;i++) clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, LM);
{ if ((mode->mdct.trig==NULL)
clt_mdct_init(&mode->mdct[i], 2*mode->shortMdctSize<<i);
if ((mode->mdct[i].trig==NULL)
#ifndef ENABLE_TI_DSPLIB55 #ifndef ENABLE_TI_DSPLIB55
|| (mode->mdct[i].kfft==NULL) || (mode->mdct.kfft==NULL)
#endif #endif
) )
goto failure; goto failure;
}
mode->prob = quant_prob_alloc(mode); mode->prob = quant_prob_alloc(mode);
if (mode->prob==NULL) if (mode->prob==NULL)
goto failure; goto failure;
@ -487,8 +488,7 @@ void celt_mode_destroy(CELTMode *mode)
celt_free((celt_int16*)mode->logN); celt_free((celt_int16*)mode->logN);
#endif #endif
for (i=0;(1<<i)<=mode->nbShortMdcts;i++) clt_mdct_clear(&mode->mdct);
clt_mdct_clear(&mode->mdct[i]);
quant_prob_free(mode->prob); quant_prob_free(mode->prob);
mode->marker_end = MODEFREED; mode->marker_end = MODEFREED;

View file

@ -98,10 +98,11 @@ struct CELTMode {
const celt_int16 * const *(_bits[MAX_CONFIG_SIZES]); /**< Cache for pulses->bits mapping in each band */ const celt_int16 * const *(_bits[MAX_CONFIG_SIZES]); /**< Cache for pulses->bits mapping in each band */
/* Stuff that could go in the {en,de}coder, but we save space this way */ /* Stuff that could go in the {en,de}coder, but we save space this way */
mdct_lookup mdct[MAX_CONFIG_SIZES]; mdct_lookup mdct;
const celt_word16 *window; const celt_word16 *window;
int maxLM;
int nbShortMdcts; int nbShortMdcts;
int shortMdctSize; int shortMdctSize;

View file

@ -45,7 +45,7 @@
/** CELT wrapper for calloc(). To do your own dynamic allocation, all you need to do is replace this function, celt_realloc and celt_free /** CELT wrapper for calloc(). To do your own dynamic allocation, all you need to do is replace this function, celt_realloc and celt_free
NOTE: celt_alloc needs to CLEAR THE MEMORY */ NOTE: celt_alloc needs to CLEAR THE MEMORY */
#ifndef OVERRIDE_CELT_ALLOC #ifndef OVERRIDE_CELT_ALLOC
static inline void *celt_alloc (int size) static void *celt_alloc (int size)
{ {
/* WARNING: this is not equivalent to malloc(). If you want to use malloc() /* WARNING: this is not equivalent to malloc(). If you want to use malloc()
or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise

View file

@ -89,7 +89,7 @@ void test1d(int nfft,int isinverse)
celt_word16 * window= (celt_word16*)malloc(sizeof(celt_word16)*nfft/2); celt_word16 * window= (celt_word16*)malloc(sizeof(celt_word16)*nfft/2);
int k; int k;
clt_mdct_init(&cfg, nfft); clt_mdct_init(&cfg, nfft, 0);
for (k=0;k<nfft;++k) { for (k=0;k<nfft;++k) {
in[k] = (rand() % 32768) - 16384; in[k] = (rand() % 32768) - 16384;
} }
@ -116,10 +116,10 @@ void test1d(int nfft,int isinverse)
{ {
for (k=0;k<nfft;++k) for (k=0;k<nfft;++k)
out[k] = 0; out[k] = 0;
clt_mdct_backward(&cfg,in,out, window, nfft/2); clt_mdct_backward(&cfg,in,out, window, nfft/2, 0);
check_inv(in,out,nfft,isinverse); check_inv(in,out,nfft,isinverse);
} else { } else {
clt_mdct_forward(&cfg,in,out,window, nfft/2); clt_mdct_forward(&cfg,in,out,window, nfft/2, 0);
check(in,out,nfft,isinverse); check(in,out,nfft,isinverse);
} }
/*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/