Implemented intensity stereo, which required changes all over the place

to make sure that stereo coupling is done at the band level. Previously
the stereo coupling was done all at once, but there were all kinds of
interactions with the prediction and folding.
This commit is contained in:
Jean-Marc Valin 2008-05-23 16:57:34 +10:00
parent 7bd2b9e001
commit bf2d648bbd
8 changed files with 123 additions and 96 deletions

View file

@ -289,8 +289,88 @@ void pitch_quant_bands(const CELTMode *m, celt_norm_t * restrict P, const celt_p
P[i] = 0;
}
static void intensity_band(celt_norm_t * restrict X, int len)
{
int j;
celt_word32_t E = 1e-15;
celt_word32_t E2 = 1e-15;
for (j=0;j<len;j++)
{
X[j] = X[2*j];
E += MULT16_16(X[j],X[j]);
E2 += MULT16_16(X[2*j+1],X[2*j+1]);
}
#ifndef FIXED_POINT
E = celt_sqrt(E+E2)/celt_sqrt(E);
for (j=0;j<len;j++)
X[j] *= E;
#endif
for (j=0;j<len;j++)
X[len+j] = 0;
}
static void dup_band(celt_norm_t * restrict X, int len)
{
int j;
for (j=len-1;j>=0;j--)
{
X[2*j] = MULT16_16_Q15(QCONST16(.70711f,15),X[j]);
X[2*j+1] = MULT16_16_Q15(QCONST16(.70711f,15),X[j]);
}
}
static void stereo_band_mix(const CELTMode *m, celt_norm_t *X, const celt_ener_t *bank, const int *stereo_mode, int bandID, int dir)
{
int i = bandID;
const celt_int16_t *eBands = m->eBands;
const int C = CHANNELS(m);
{
int j;
celt_word16_t left, right;
celt_word16_t a1, a2;
celt_word16_t norm;
#ifdef FIXED_POINT
int shift = celt_zlog2(MAX32(bank[i*C], bank[i*C+1]))-13;
#endif
left = VSHR32(bank[i*C],shift);
right = VSHR32(bank[i*C+1],shift);
norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
a2 = dir*DIV32_16(SHL32(EXTEND32(right),14),norm);
if (stereo_mode[i] && dir <0)
{
dup_band(X+C*eBands[i], eBands[i+1]-eBands[i]);
} else {
for (j=eBands[i];j<eBands[i+1];j++)
{
celt_norm_t r, l;
l = X[j*C];
r = X[j*C+1];
X[j*C] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
X[j*C+1] = MULT16_16_Q14(a1,r) - MULT16_16_Q14(a2,l);
}
}
if (stereo_mode[i] && dir>0)
{
intensity_band(X+C*eBands[i], eBands[i+1]-eBands[i]);
}
}
}
void stereo_decision(const CELTMode *m, celt_norm_t * restrict X, int *stereo_mode, int len)
{
int i;
for (i=0;i<len-5;i++)
stereo_mode[i] = 0;
for (;i<len;i++)
stereo_mode[i] = 1;
}
/* Quantisation of the residual */
void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const int *stereo_mode, int total_bits, ec_enc *enc)
void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_enc *enc)
{
int i, j, bits;
const celt_int16_t * restrict eBands = m->eBands;
@ -336,15 +416,20 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
if (q > 0)
{
/*int nb_rotations = q <= 2*C ? 2*C/q : 0;
if (nb_rotations != 0)
int ch=C;
if (C==2 && stereo_mode[i]==1)
ch = 1;
if (C==2)
{
exp_rotation(P+C*eBands[i], C*(eBands[i+1]-eBands[i]), -1, C, nb_rotations);
exp_rotation(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), -1, C, nb_rotations);
}*/
alg_quant(X+C*eBands[i], W+C*eBands[i], C*(eBands[i+1]-eBands[i]), q, P+C*eBands[i], enc);
/*if (nb_rotations != 0)
exp_rotation(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), 1, C, nb_rotations);*/
stereo_band_mix(m, X, bandE, stereo_mode, i, 1);
stereo_band_mix(m, P, bandE, stereo_mode, i, 1);
}
alg_quant(X+C*eBands[i], W+C*eBands[i], ch*(eBands[i+1]-eBands[i]), q, P+C*eBands[i], enc);
if (C==2)
stereo_band_mix(m, X, bandE, stereo_mode, i, -1);
} else {
for (j=C*eBands[i];j<C*eBands[i+1];j++)
X[j] = P[j];
}
for (j=C*eBands[i];j<C*eBands[i+1];j++)
norm[j] = MULT16_16_Q15(n,X[j]);
@ -353,7 +438,7 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
}
/* Decoding of the residual */
void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const int *stereo_mode, int total_bits, ec_dec *dec)
void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_dec *dec)
{
int i, j, bits;
const celt_int16_t * restrict eBands = m->eBands;
@ -394,12 +479,17 @@ void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P,
if (q > 0)
{
/*int nb_rotations = q <= 2*C ? 2*C/q : 0;
if (nb_rotations != 0)
exp_rotation(P+C*eBands[i], C*(eBands[i+1]-eBands[i]), -1, C, nb_rotations);*/
alg_unquant(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), q, P+C*eBands[i], dec);
/*if (nb_rotations != 0)
exp_rotation(X+C*eBands[i], C*(eBands[i+1]-eBands[i]), 1, C, nb_rotations);*/
int ch=C;
if (C==2 && stereo_mode[i]==1)
ch = 1;
if (C==2)
stereo_band_mix(m, P, bandE, stereo_mode, i, 1);
alg_unquant(X+C*eBands[i], ch*(eBands[i+1]-eBands[i]), q, P+C*eBands[i], dec);
if (C==2)
stereo_band_mix(m, X, bandE, stereo_mode, i, -1);
} else {
for (j=C*eBands[i];j<C*eBands[i+1];j++)
X[j] = P[j];
}
for (j=C*eBands[i];j<C*eBands[i+1];j++)
norm[j] = MULT16_16_Q15(n,X[j]);
@ -407,44 +497,3 @@ void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P,
RESTORE_STACK;
}
#ifndef DISABLE_STEREO
void stereo_decision(const CELTMode *m, celt_norm_t * restrict X, int *stereo_mode, int len)
{
int i;
for (i=0;i<len-5;i++)
stereo_mode[i] = 0;
for (;i<len;i++)
stereo_mode[i] = 0;
}
void stereo_mix(const CELTMode *m, celt_norm_t *X, const celt_ener_t *bank, const int *stereo_mode, int dir)
{
int i;
const celt_int16_t *eBands = m->eBands;
const int C = CHANNELS(m);
for (i=0;i<m->nbEBands;i++)
{
int j;
celt_word16_t left, right;
celt_word16_t a1, a2;
celt_word16_t norm;
#ifdef FIXED_POINT
int shift = celt_zlog2(MAX32(bank[i*C], bank[i*C+1]))-13;
#endif
left = VSHR32(bank[i*C],shift);
right = VSHR32(bank[i*C+1],shift);
norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
a2 = dir*DIV32_16(SHL32(EXTEND32(right),14),norm);
for (j=eBands[i];j<eBands[i+1];j++)
{
celt_norm_t r, l;
l = X[j*C];
r = X[j*C+1];
X[j*C] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r);
X[j*C+1] = MULT16_16_Q14(a1,r) - MULT16_16_Q14(a2,l);
}
}
}
#endif

View file

@ -86,7 +86,7 @@ void pitch_quant_bands(const CELTMode *m, celt_norm_t * restrict P, const celt_p
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param enc Entropy encoder
*/
void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const int *stereo_mode, int total_bits, ec_enc *enc);
void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_enc *enc);
/** Decoding of the residual spectrum
* @param m Mode data
@ -95,10 +95,8 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param dec Entropy decoder
*/
void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const int *stereo_mode, int total_bits, ec_dec *dec);
void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_dec *dec);
void stereo_decision(const CELTMode *m, celt_norm_t * restrict X, int *stereo_mode, int len);
void stereo_mix(const CELTMode *m, celt_norm_t *X, const celt_ener_t *bank, const int *stereo_mode, int dir);
#endif /* BANDS_H */

View file

@ -325,26 +325,17 @@ int EXPORT celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, u
quant_energy(st->mode, bandE, st->oldBandE, 20+nbCompressedBytes*8/5, st->mode->prob, &st->enc);
ALLOC(stereo_mode, st->mode->nbEBands, int);
if (C==2)
{
stereo_decision(st->mode, X, stereo_mode, st->mode->nbEBands);
stereo_mix(st->mode, X, bandE, stereo_mode, 1);
stereo_mix(st->mode, P, bandE, stereo_mode, 1);
}
pitch_quant_bands(st->mode, P, gains);
/*for (i=0;i<B*N;i++) printf("%f ",P[i]);printf("\n");*/
/* Compute residual that we're going to encode */
for (i=0;i<C*st->mode->eBands[st->mode->nbEBands];i++)
X[i] -= P[i];
/* Residual quantisation */
quant_bands(st->mode, X, P, NULL, stereo_mode, nbCompressedBytes*8, &st->enc);
quant_bands(st->mode, X, P, NULL, bandE, stereo_mode, nbCompressedBytes*8, &st->enc);
if (C==2)
{
stereo_mix(st->mode, X, bandE, stereo_mode, -1);
renormalise_bands(st->mode, X);
}
/* Synthesis */
@ -606,20 +597,15 @@ int EXPORT celt_decode(CELTDecoder * restrict st, unsigned char *data, int len,
}
ALLOC(stereo_mode, st->mode->nbEBands, int);
if (C==2)
{
stereo_decision(st->mode, X, stereo_mode, st->mode->nbEBands);
stereo_mix(st->mode, P, bandE, stereo_mode, 1);
}
/* Apply pitch gains */
pitch_quant_bands(st->mode, P, gains);
/* Decode fixed codebook and merge with pitch */
unquant_bands(st->mode, X, P, stereo_mode, len*8, &dec);
unquant_bands(st->mode, X, P, bandE, stereo_mode, len*8, &dec);
if (C==2)
{
stereo_mix(st->mode, X, bandE, stereo_mode, -1);
renormalise_bands(st->mode, X);
}
/* Synthesis */

View file

@ -156,6 +156,7 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
fprintf(file, "%d,\t/* nbAllocVectors */\n", mode->nbAllocVectors);
fprintf(file, "allocVectors%d_%d,\t/* allocVectors */\n", mode->Fs, mode->mdctSize);
fprintf(file, "allocCache%d_%d_%d,\t/* bits */\n", mode->Fs, mode->mdctSize, mode->nbChannels);
fprintf(file, "0,\t/* bits_stereo */\n");
fprintf(file, "{%d, 0, 0},\t/* mdct */\n", 2*mode->mdctSize);
fprintf(file, "0,\t/* fft */\n");
fprintf(file, "window%d,\t/* window */\n", mode->overlap);

View file

@ -364,8 +364,9 @@ CELTMode EXPORT *celt_mode_create(celt_int32_t Fs, int channels, int frame_size,
#endif
mode->window = window;
mode->bits = (const celt_int16_t **)compute_alloc_cache(mode, mode->nbChannels);
mode->bits = (const celt_int16_t **)compute_alloc_cache(mode, 1);
mode->bits_stereo = NULL;
#ifndef SHORTCUTS
psydecay_init(&mode->psy, MAX_PERIOD/2, mode->Fs);
#endif
@ -379,6 +380,9 @@ CELTMode EXPORT *celt_mode_create(celt_int32_t Fs, int channels, int frame_size,
mode->prob = quant_prob_alloc(mode);
compute_energy_allocation_table(mode);
if (mode->nbChannels>=2)
mode->bits_stereo = (const celt_int16_t **)compute_alloc_cache(mode, mode->nbChannels);
if (error)
*error = CELT_OK;
return mode;

View file

@ -89,6 +89,8 @@ struct CELTMode {
const celt_int16_t * const *bits; /**< Cache for pulses->bits mapping in each band */
const celt_int16_t * const *bits_stereo; /**< Cache for pulses->bits mapping in each band */
/* Stuff that could go in the {en,de}coder, but we save space this way */
mdct_lookup mdct;
kiss_fftr_cfg fft;

View file

@ -105,7 +105,6 @@ celt_int16_t **compute_alloc_cache(CELTMode *m, int C)
bits = celt_alloc(m->nbEBands*sizeof(celt_int16_t*));
C = m->nbChannels;
prevN = -1;
for (i=0;i<m->nbEBands;i++)
{
@ -270,6 +269,8 @@ int compute_allocation(const CELTMode *m, int *offsets, const int *stereo_mode,
for (i=0;i<len;i++)
{
if (stereo_mode[i]==0)
cache[i] = m->bits_stereo[i];
else
cache[i] = m->bits[i];
}
} else {

View file

@ -118,6 +118,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
sum = 0;
j=0; do {
X[j] -= P[j];
if (X[j]>0)
signx[j]=1;
else
@ -307,14 +308,6 @@ void intra_prediction(const CELTMode *m, celt_norm_t * restrict x, celt_mask_t *
pred_gain = s*MULT16_16_Q15(pred_gain,celt_rcp(SHL32(celt_sqrt(E),9)));
for (j=0;j<C*N;j++)
P[j] = PSHR32(MULT16_16(pred_gain, P[j]),8);
if (K>0)
{
for (j=0;j<C*N;j++)
x[j] -= P[j];
} else {
for (j=0;j<C*N;j++)
x[j] = P[j];
}
}
void intra_unquant(const CELTMode *m, celt_norm_t *x, int N, int K, celt_norm_t *Y, celt_norm_t * restrict P, int N0, int Nmax, ec_dec *dec)
@ -353,11 +346,6 @@ void intra_unquant(const CELTMode *m, celt_norm_t *x, int N, int K, celt_norm_t
pred_gain = s*MULT16_16_Q15(pred_gain,celt_rcp(SHL32(celt_sqrt(E),9)));
for (j=0;j<C*N;j++)
P[j] = PSHR32(MULT16_16(pred_gain, P[j]),8);
if (K==0)
{
for (j=0;j<C*N;j++)
x[j] = P[j];
}
}
void intra_fold(const CELTMode *m, celt_norm_t *x, int N, celt_norm_t *Y, celt_norm_t * restrict P, int N0, int Nmax)
@ -388,7 +376,5 @@ void intra_fold(const CELTMode *m, celt_norm_t *x, int N, celt_norm_t *Y, celt_n
g = celt_rcp(SHL32(celt_sqrt(E),9));
for (j=0;j<C*N;j++)
P[j] = PSHR32(MULT16_16(g, P[j]),8);
for (j=0;j<C*N;j++)
x[j] = P[j];
}