Energy quantization tuning.

Prediction coefficient now depends on the frame size. Also, the pdfs and means have been adjusted and better quantized. This breaks compatibility of course.
2010-07-23 16:10:36 -04:00 · 2010-07-23 16:10:36 -04:00 · ca6533cd88
commit ca6533cd88
parent 2ed05631f8
6 changed files with 28 additions and 30 deletions
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@ -986,7 +986,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
 #else
   max_decay = .125*nbAvailableBytes;
 #endif
-   quant_coarse_energy(st->mode, st->start, st->end, bandLogE, st->oldBandE, nbCompressedBytes*8, intra_ener, st->mode->prob, error, enc, C, max_decay);
+   quant_coarse_energy(st->mode, st->start, st->end, bandLogE, st->oldBandE, nbCompressedBytes*8, intra_ener, st->mode->prob, error, enc, C, LM, max_decay);
   /* Variable bitrate */
   if (vbr_rate>0)
   {
@ -1800,7 +1800,7 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da
   ALLOC(fine_quant, st->mode->nbEBands, int);
   /* Get band energies */
-   unquant_coarse_energy(st->mode, st->start, st->end, bandE, st->oldBandE, intra_ener, st->mode->prob, dec, C);
+   unquant_coarse_energy(st->mode, st->start, st->end, bandE, st->oldBandE, intra_ener, st->mode->prob, dec, C, LM);
   ALLOC(tf_res, st->mode->nbEBands, int);
   tf_decode(st->start, st->end, C, isTransient, tf_res, nbAvailableBytes, LM, dec);
--- a/libcelt/dump_modes.c
+++ b/libcelt/dump_modes.c
@ -152,7 +152,6 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
      fprintf(file, "%d,\t/* nbEBands */\n", mode->nbEBands);
      fprintf(file, "%d,\t/* pitchEnd */\n", mode->pitchEnd);
      fprintf(file, "eBands%d_%d,\t/* eBands */\n", mode->Fs, mdctSize);
      fprintf(file, WORD16 ",\t/* ePredCoef */\n", mode->ePredCoef);
      fprintf(file, "%d,\t/* nbAllocVectors */\n", mode->nbAllocVectors);
      fprintf(file, "allocVectors%d_%d,\t/* allocVectors */\n", mode->Fs, mdctSize);
      fprintf(file, "NULL,\t/* bits */\n");
--- a/libcelt/modes.c
+++ b/libcelt/modes.c
@ -334,7 +334,6 @@ CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
      goto failure;
   mode->marker_start = MODEPARTIAL;
   mode->Fs = Fs;
   mode->ePredCoef = QCONST16(.8f,15);
   /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis
      is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should
--- a/libcelt/modes.h
+++ b/libcelt/modes.h
@ -44,7 +44,7 @@
 #define MAX_CONFIG_SIZES 5
-#define CELT_BITSTREAM_VERSION 0x8000000c
+#define CELT_BITSTREAM_VERSION 0x8000000d
 #ifdef STATIC_MODES
 #include "static_modes.h"
@ -52,14 +52,6 @@
 #define MAX_PERIOD 1024
 #ifndef MCHANNELS
 # ifdef DISABLE_STEREO
 #  define MCHANNELS(mode) (1)
 # else
 #  define MCHANNELS(mode) ((mode)->nbChannels)
 # endif
 #endif
 #ifndef CHANNELS
 # ifdef DISABLE_STEREO
 #  define CHANNELS(_C) (1)
@ -90,8 +82,6 @@ struct CELTMode {
   celt_word16    preemph[4];
   const celt_int16   *eBands;   /**< Definition for each "pseudo-critical band" */
   celt_word16 ePredCoef;/**< Prediction coefficient for the energy encoding */
   int          nbAllocVectors; /**< Number of lines in the matrix below */
   const unsigned char   *allocVectors;   /**< Number of bits in each band for several rates */
--- a/libcelt/quant_bands.c
+++ b/libcelt/quant_bands.c
@ -42,9 +42,16 @@
 #include "mathops.h"
 #include "stack_alloc.h"
-#define E_MEANS_SIZE (5)
+#define E_MEANS_SIZE (3)
-const celt_word16 eMeans[E_MEANS_SIZE] = {QCONST16(7.5f,DB_SHIFT), -QCONST16(1.33f,DB_SHIFT), -QCONST16(2.f,DB_SHIFT), -QCONST16(0.42f,DB_SHIFT), QCONST16(0.17f,DB_SHIFT)};
+static const celt_word16 eMeans[E_MEANS_SIZE] = {QCONST16(7.5f,DB_SHIFT), -QCONST16(1.f,DB_SHIFT), -QCONST16(.5f,DB_SHIFT)};
 /* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */
 #ifdef FIXED_POINT
 static const celt_word16 pred_coef[4] = {29440, 26112, 21248, 16384};
 #else
 static const celt_word16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.};
 #endif
 /* FIXME: Implement for stereo */
 int intra_decision(celt_word16 *eBands, celt_word16 *oldEBands, int len)
@ -68,12 +75,12 @@ int *quant_prob_alloc(const CELTMode *m)
     return NULL;
   for (i=0;i<m->nbEBands;i++)
   {
-      prob[2*i] = 6000-i*200;
+      prob[2*i] = 7000-i*200;
      prob[2*i+1] = ec_laplace_get_start_freq(prob[2*i]);
   }
   for (i=0;i<m->nbEBands;i++)
   {
-      prob[2*m->nbEBands+2*i] = 9000-i*240;
+      prob[2*m->nbEBands+2*i] = 9000-i*220;
      prob[2*m->nbEBands+2*i+1] = ec_laplace_get_start_freq(prob[2*m->nbEBands+2*i]);
   }
   return prob;
@ -84,22 +91,23 @@ void quant_prob_free(int *freq)
   celt_free(freq);
 }
-void quant_coarse_energy(const CELTMode *m, int start, int end, const celt_word16 *eBands, celt_word16 *oldEBands, int budget, int intra, int *prob, celt_word16 *error, ec_enc *enc, int _C, celt_word16 max_decay)
+void quant_coarse_energy(const CELTMode *m, int start, int end, const celt_word16 *eBands, celt_word16 *oldEBands, int budget, int intra, int *prob, celt_word16 *error, ec_enc *enc, int _C, int LM, celt_word16 max_decay)
 {
   int i, c;
   celt_word32 prev[2] = {0,0};
-   celt_word16 coef = m->ePredCoef;
+   celt_word16 coef;
   celt_word16 beta;
   const int C = CHANNELS(_C);
   coef = pred_coef[LM];
   if (intra)
   {
      coef = 0;
      prob += 2*m->nbEBands;
   }
-   /* The .8 is a heuristic */
+   /* No theoretical justification for this, it just works */
-   beta = MULT16_16_P15(QCONST16(.8f,15),coef);
+   beta = MULT16_16_P15(coef,coef);
   /* Encode at a fixed coarse resolution */
   for (i=start;i<end;i++)
   {
@ -228,21 +236,23 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, celt_ener *eBa
   } while (++c < C);
 }
-void unquant_coarse_energy(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, int intra, int *prob, ec_dec *dec, int _C)
+void unquant_coarse_energy(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, int intra, int *prob, ec_dec *dec, int _C, int LM)
 {
   int i, c;
   celt_word32 prev[2] = {0, 0};
-   celt_word16 coef = m->ePredCoef;
+   celt_word16 coef;
   celt_word16 beta;
   const int C = CHANNELS(_C);
   coef = pred_coef[LM];
   if (intra)
   {
      coef = 0;
      prob += 2*m->nbEBands;
   }
-   /* The .8 is a heuristic */
+   /* No theoretical justification for this, it just works */
-   beta = MULT16_16_P15(QCONST16(.8f,15),coef);
+   beta = MULT16_16_P15(coef,coef);
   /* Decode at a fixed coarse resolution */
   for (i=start;i<end;i++)
--- a/libcelt/quant_bands.h
+++ b/libcelt/quant_bands.h
@ -56,13 +56,13 @@ void compute_fine_allocation(const CELTMode *m, int *bits, int budget);
 int intra_decision(celt_word16 *eBands, celt_word16 *oldEBands, int len);
-void quant_coarse_energy(const CELTMode *m, int start, int end, const celt_word16 *eBands, celt_word16 *oldEBands, int budget, int intra, int *prob, celt_word16 *error, ec_enc *enc, int _C, celt_word16 max_decay);
+void quant_coarse_energy(const CELTMode *m, int start, int end, const celt_word16 *eBands, celt_word16 *oldEBands, int budget, int intra, int *prob, celt_word16 *error, ec_enc *enc, int _C, int LM, celt_word16 max_decay);
 void quant_fine_energy(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, celt_word16 *error, int *fine_quant, ec_enc *enc, int _C);
 void quant_energy_finalise(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, celt_word16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int _C);
-void unquant_coarse_energy(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, int intra, int *prob, ec_dec *dec, int _C);
+void unquant_coarse_energy(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, int intra, int *prob, ec_dec *dec, int _C, int LM);
 void unquant_fine_energy(const CELTMode *m, int start, int end, celt_ener *eBands, celt_word16 *oldEBands, int *fine_quant, ec_dec *dec, int _C);