Preserving 24-bit accuracy for fixed-point decoder

Convert to 16 bits only at the very end
2025-05-14 15:38:32 +00:00 · 2024-06-08 21:46:22 -04:00 · 2024-06-08 21:46:22 -04:00 · a4854afac8
commit a4854afac8
parent 4141c4d529
14 changed files with 212 additions and 157 deletions
--- a/celt/arch.h
+++ b/celt/arch.h
@ -106,6 +106,14 @@ void celt_fatal(const char *str, const char *file, int line)
 #define UADD32(a,b) ((a)+(b))
 #define USUB32(a,b) ((a)-(b))

+/* Throughout the code, we use the following scaling for signals:
+   FLOAT: used for float API, normalized to +/-1.
+   INT16: used for 16-bit API, normalized to +/- 32768
+   RES: internal Opus resolution, defined as +/-1. in float builds, or either 16-bit or 24-bit int for fixed-point builds
+   SIG: internal CELT resolution: defined as +/- 32768. in float builds, or Q27 in fixed-point builds (int16 shifted by 12)
+*/
+
+
 /* Set this if opus_int64 is a native type of the CPU. */
 /* Assume that all LP64 architectures have fast 64-bit types; also x86_64
   (which can be ILP32 for x32) and Win64 (which is LLP64). */
@ -127,6 +135,26 @@ typedef opus_val32 celt_sig;
 typedef opus_val16 celt_norm;
 typedef opus_val32 celt_ener;

+#ifdef ENABLE_RES24
+typedef opus_val32 opus_res;
+#define RES_SHIFT 8
+#define SCALEIN(a)      (a)
+#define SIG2RES(a)      PSHR32(a, SIG_SHIFT-RES_SHIFT)
+#define RES2INT16(a)    SAT16(PSHR32(a, RES_SHIFT))
+#define RES2FLOAT(a)    ((1.f/32768.f/256.)*(a))
+#define INT16TORES(a)   SHL32(EXTEND32(a), RES_SHIFT)
+#define ADD_RES(a, b)   ADD32(a, b)
+#else
+typedef opus_val16 opus_res;
+#define RES_SHIFT 0
+#define SCALEIN(a)      (a)
+#define SIG2RES(a)      SIG2WORD16(a)
+#define RES2INT16(a)    (a)
+#define RES2FLOAT(a)    ((1.f/32768.f)*(a))
+#define INT16TORES(a)   (a)
+#define ADD_RES(a, b)   SAT16(ADD32((a), (b)));
+#endif
+
 #define celt_isnan(x) 0

 #define Q15ONE 32767
@ -150,8 +178,6 @@ typedef opus_val32 celt_ener;
 #define VERY_LARGE16 ((opus_val16)32767)
 #define Q15_ONE ((opus_val16)32767)

-#define SCALEIN(a)      (a)
-#define SCALEOUT(a)     (a)

 #define ABS16(x) ((x) < 0 ? (-(x)) : (x))
 #define ABS32(x) ((x) < 0 ? (-(x)) : (x))
@ -192,6 +218,8 @@ typedef float celt_sig;
 typedef float celt_norm;
 typedef float celt_ener;

+typedef float opus_res;
+
 #ifdef FLOAT_APPROX
 /* This code should reliably detect NaN/inf even when -ffast-math is used.
   Assumes IEEE 754 format. */
@ -279,9 +307,12 @@ static OPUS_INLINE int celt_isnan(float x)
 #define DIV32(a,b)     (((opus_val32)(a))/(opus_val32)(b))

 #define SCALEIN(a)      ((a)*CELT_SIG_SCALE)
-#define SCALEOUT(a)     ((a)*(1/CELT_SIG_SCALE))

-#define SIG2WORD16(x) (x)
+#define SIG2RES(a)      ((1/CELT_SIG_SCALE)*(a))
+#define RES2INT16(a)    FLOAT2INT16(a)
+#define RES2FLOAT(a)    (a)
+#define INT16TORES(a)   ((a)*(1/CELT_SIG_SCALE))
+#define ADD_RES(a, b)   ADD32(a, b)

 #endif /* !FIXED_POINT */

--- a/celt/celt.h
+++ b/celt/celt.h
@ -154,14 +154,14 @@ int celt_decoder_get_size(int channels);
 int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);

 int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
 #ifdef ENABLE_DEEP_PLC
      ,LPCNetPLCState *lpcnet
 #endif
      );

 int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);

 #define celt_encoder_ctl opus_custom_encoder_ctl
 #define celt_decoder_ctl opus_custom_decoder_ctl
@ -239,7 +239,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
 void init_caps(const CELTMode *m,int *cap,int LM,int C);

 #ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
+void deemphasis(celt_sig *in[], opus_res *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
 void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
      opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
      int LM, int downsample, int silence, int arch);
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@ -246,7 +246,7 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
 /* Special case for stereo with no downsampling and no accumulation. This is
   quite common and we can make it faster by processing both channels in the
   same loop, reducing overhead due to the dependency loop in the IIR filter. */
-static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
+static void deemphasis_stereo_simple(celt_sig *in[], opus_res *pcm, int N, const opus_val16 coef0,
      celt_sig *mem)
 {
   celt_sig * OPUS_RESTRICT x0;
@ -265,8 +265,8 @@ static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, con
      tmp1 = SATURATE(x1[j] + VERY_SMALL + m1, SIG_SAT);
      m0 = MULT16_32_Q15(coef0, tmp0);
      m1 = MULT16_32_Q15(coef0, tmp1);
-      pcm[2*j  ] = SCALEOUT(SIG2WORD16(tmp0));
-      pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
+      pcm[2*j  ] = SIG2RES(tmp0);
+      pcm[2*j+1] = SIG2RES(tmp1);
   }
   mem[0] = m0;
   mem[1] = m1;
@ -276,7 +276,7 @@ static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, con
 #ifndef RESYNTH
 static
 #endif
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
+void deemphasis(celt_sig *in[], opus_res *pcm, int N, int C, int downsample, const opus_val16 *coef,
      celt_sig *mem, int accum)
 {
   int c;
@ -292,10 +292,6 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
      deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
      return;
   }
-#endif
-#ifndef FIXED_POINT
-   (void)accum;
-   celt_assert(accum==0);
 #endif
   ALLOC(scratch, N, celt_sig);
   coef0 = coef[0];
@ -303,7 +299,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
   c=0; do {
      int j;
      celt_sig * OPUS_RESTRICT x;
-      opus_val16  * OPUS_RESTRICT y;
+      opus_res  * OPUS_RESTRICT y;
      celt_sig m = mem[c];
      x =in[c];
      y = pcm+c;
@ -335,23 +331,21 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
         apply_downsampling=1;
      } else {
         /* Shortcut for the standard (non-custom modes) case */
-#ifdef FIXED_POINT
         if (accum)
         {
            for (j=0;j<N;j++)
            {
               celt_sig tmp = SATURATE(x[j] + m + VERY_SMALL, SIG_SAT);
               m = MULT16_32_Q15(coef0, tmp);
-               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
+               y[j*C] = ADD_RES(y[j*C], SIG2RES(tmp));
            }
         } else
-#endif
         {
            for (j=0;j<N;j++)
            {
               celt_sig tmp = SATURATE(x[j] + VERY_SMALL + m, SIG_SAT);
               m = MULT16_32_Q15(coef0, tmp);
-               y[j*C] = SCALEOUT(SIG2WORD16(tmp));
+               y[j*C] = SIG2RES(tmp);
            }
         }
      }
@ -360,16 +354,14 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
      if (apply_downsampling)
      {
         /* Perform down-sampling */
-#ifdef FIXED_POINT
         if (accum)
         {
            for (j=0;j<Nd;j++)
-               y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
+               y[j*C] = ADD_RES(y[j*C], SIG2RES(scratch[j*downsample]));
         } else
-#endif
         {
            for (j=0;j<Nd;j++)
-               y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
+               y[j*C] = SIG2RES(scratch[j*downsample]);
         }
      }
   } while (++c<C);
@ -968,7 +960,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
 }

 int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
 #ifdef ENABLE_DEEP_PLC
      ,LPCNetPLCState *lpcnet
 #endif
@ -1369,7 +1361,7 @@ int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char
 }

 int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
-      int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+      int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
 {
   return celt_decode_with_ec_dred(st, data, len, pcm, frame_size, dec, accum
 #ifdef ENABLE_DEEP_PLC
@ -1381,16 +1373,11 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
 #ifdef CUSTOM_MODES

 #ifdef FIXED_POINT
+#ifdef ENABLE_RES24
 int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
-{
-   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
-}
-
-#ifndef DISABLE_FLOAT_API
-int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
 {
   int j, ret, C, N;
-   VARDECL(opus_int16, out);
+   VARDECL(opus_res, out);
   ALLOC_STACK;

   if (pcm==NULL)
@ -1399,11 +1386,40 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char
   C = st->channels;
   N = frame_size;

-   ALLOC(out, C*N, opus_int16);
+   ALLOC(out, C*N, opus_res);
+   ret = celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
+   if (ret>0)
+      for (j=0;j<C*ret;j++)
+         pcm[j]=RES2INT16(out[j]);
+
+   RESTORE_STACK;
+   return ret;
+}
+#else
+int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
+{
+   return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
+}
+#endif
+
+#ifndef DISABLE_FLOAT_API
+int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
+{
+   int j, ret, C, N;
+   VARDECL(opus_res, out);
+   ALLOC_STACK;
+
+   if (pcm==NULL)
+      return OPUS_BAD_ARG;
+
+   C = st->channels;
+   N = frame_size;
+
+   ALLOC(out, C*N, opus_res);
   ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
   if (ret>0)
      for (j=0;j<C*ret;j++)
-         pcm[j]=out[j]*(1.f/32768.f);
+         pcm[j]=RES2FLOAT(out[j]);

   RESTORE_STACK;
   return ret;
@ -1434,7 +1450,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data

   if (ret>0)
      for (j=0;j<C*ret;j++)
-         pcm[j] = FLOAT2INT16 (out[j]);
+         pcm[j] = RES2INT16 (out[j]);

   RESTORE_STACK;
   return ret;
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@ -2591,7 +2591,7 @@ int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, i
   N=frame_size;
   ALLOC(in, C*N, celt_sig);
   for (j=0;j<C*N;j++) {
-     in[j] = SCALEOUT(pcm[j]);
+     in[j] = (1.0f/32768)*pcm[j];
   }

   ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL);
--- a/configure.ac
+++ b/configure.ac
@ -151,6 +151,14 @@ AS_IF([test "$enable_float_api" = "no"],[
  AC_DEFINE([DISABLE_FLOAT_API], [1], [Do not build the float API])
 ])

+AC_ARG_ENABLE([fixed-res24],
+	      [AS_HELP_STRING([--enable-fixed-res24], [Use 24-bit internal resolution for fixed-point implementation])],,
+    [enable_fixed_res24=no])
+
+AS_IF([test "$enable_fixed_res24" = "yes"],[
+  AC_DEFINE([ENABLE_RES24], [1], [24-bit internal resolution for fixed-point])
+])
+
 AC_ARG_ENABLE([custom-modes],
    [AS_HELP_STRING([--enable-custom-modes], [enable non-Opus modes, e.g. 44.1 kHz & 2^n frames])],,
    [enable_custom_modes=no])
--- a/silk/API.h
+++ b/silk/API.h
@ -129,7 +129,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
-    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_res                        *samplesOut,        /* O    Decoded output speech vector                    */
    opus_int32                      *nSamplesOut,       /* O    Number of samples decoded                       */
 #ifdef ENABLE_DEEP_PLC
    LPCNetPLCState                  *lpcnet,
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@ -135,7 +135,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
-    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
+    opus_res                        *samplesOut,        /* O    Decoded output speech vector                    */
    opus_int32                      *nSamplesOut,       /* O    Number of samples decoded                       */
 #ifdef ENABLE_DEEP_PLC
    LPCNetPLCState                  *lpcnet,
@ -147,7 +147,6 @@ opus_int silk_Decode(                                   /* O    Returns error co
    opus_int32 nSamplesOutDec, LBRR_symbol;
    opus_int16 *samplesOut1_tmp[ 2 ];
    VARDECL( opus_int16, samplesOut1_tmp_storage1 );
-    VARDECL( opus_int16, samplesOut1_tmp_storage2 );
    VARDECL( opus_int16, samplesOut2_tmp );
    opus_int32 MS_pred_Q13[ 2 ] = { 0 };
    opus_int16 *resample_out_ptr;
@ -155,7 +154,6 @@ opus_int silk_Decode(                                   /* O    Returns error co
    silk_decoder_state *channel_state = psDec->channel_state;
    opus_int has_side;
    opus_int stereo_to_mono;
-    int delay_stack_alloc;
    SAVE_STACK;

    celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@ -312,19 +310,10 @@ opus_int silk_Decode(                                   /* O    Returns error co
    /* Check if the temp buffer fits into the output PCM buffer. If it fits,
       we can delay allocating the temp buffer until after the SILK peak stack
       usage. We need to use a < and not a <= because of the two extra samples. */
-    delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
-          < decControl->API_sampleRate*decControl->nChannelsAPI;
-    ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
-           : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
+    ALLOC( samplesOut1_tmp_storage1, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
           opus_int16 );
-    if ( delay_stack_alloc )
-    {
-       samplesOut1_tmp[ 0 ] = samplesOut;
-       samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
-    } else {
    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
-    }

    if( lostFlag == FLAG_DECODE_NORMAL ) {
        has_side = !decode_only_middle;
@ -384,23 +373,9 @@ opus_int silk_Decode(                                   /* O    Returns error co
    *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );

    /* Set up pointers to temp buffers */
-    ALLOC( samplesOut2_tmp,
-           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
-    if( decControl->nChannelsAPI == 2 ) {
+    ALLOC( samplesOut2_tmp, *nSamplesOut, opus_int16 );
    resample_out_ptr = samplesOut2_tmp;
-    } else {
-        resample_out_ptr = samplesOut;
-    }

-    ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
-           ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
-           : ALLOC_NONE,
-           opus_int16 );
-    if ( delay_stack_alloc ) {
-       OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
-       samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
-       samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
-    }
    for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {

        /* Resample decoded signal to API_sampleRate */
@ -409,7 +384,11 @@ opus_int silk_Decode(                                   /* O    Returns error co
        /* Interleave if stereo output and stereo stream */
        if( decControl->nChannelsAPI == 2 ) {
            for( i = 0; i < *nSamplesOut; i++ ) {
-                samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
+                samplesOut[ n + 2 * i ] = INT16TORES(resample_out_ptr[ i ]);
+            }
+        } else {
+            for( i = 0; i < *nSamplesOut; i++ ) {
+                samplesOut[ i ] = INT16TORES(resample_out_ptr[ i ]);
            }
        }
    }
@ -422,7 +401,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
            ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );

            for( i = 0; i < *nSamplesOut; i++ ) {
-                samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
+                samplesOut[ 1 + 2 * i ] = INT16TORES(resample_out_ptr[ i ]);
            }
        } else {
            for( i = 0; i < *nSamplesOut; i++ ) {
--- a/src/mapping_matrix.c
+++ b/src/mapping_matrix.c
@ -118,7 +118,7 @@ void mapping_matrix_multiply_channel_in_float(

 void mapping_matrix_multiply_channel_out_float(
    const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
    int input_row,
    int input_rows,
    float *output,
@ -137,11 +137,7 @@ void mapping_matrix_multiply_channel_out_float(

  for (i = 0; i < frame_size; i++)
  {
-#if defined(FIXED_POINT)
-    input_sample = (1/32768.f)*input[input_rows * i];
-#else
-    input_sample = input[input_rows * i];
-#endif
+    input_sample = RES2FLOAT(input[input_rows * i]);
    for (row = 0; row < output_rows; row++)
    {
      float tmp =
@ -195,7 +191,7 @@ void mapping_matrix_multiply_channel_in_short(

 void mapping_matrix_multiply_channel_out_short(
    const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
    int input_row,
    int input_rows,
    opus_int16 *output,
@ -213,11 +209,7 @@ void mapping_matrix_multiply_channel_out_short(

  for (i = 0; i < frame_size; i++)
  {
-#if defined(FIXED_POINT)
-    input_sample = (opus_int32)input[input_rows * i];
-#else
-    input_sample = (opus_int32)FLOAT2INT16(input[input_rows * i]);
-#endif
+    input_sample = RES2INT16(input[input_rows * i]);
    for (row = 0; row < output_rows; row++)
    {
      opus_int32 tmp =
--- a/src/mapping_matrix.h
+++ b/src/mapping_matrix.h
@ -74,7 +74,7 @@ void mapping_matrix_multiply_channel_in_float(

 void mapping_matrix_multiply_channel_out_float(
    const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
    int input_row,
    int input_rows,
    float *output,
@ -95,7 +95,7 @@ void mapping_matrix_multiply_channel_in_short(

 void mapping_matrix_multiply_channel_out_short(
    const MappingMatrix *matrix,
-    const opus_val16 *input,
+    const opus_res *input,
    int input_row,
    int input_rows,
    opus_int16 *output,
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@ -202,8 +202,26 @@ OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error)
   return st;
 }

-static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
-      opus_val16 *out, int overlap, int channels,
+#ifdef ENABLE_RES24
+static void smooth_fade(const opus_res *in1, const opus_res *in2,
+      opus_res *out, int overlap, int channels,
+      const opus_val16 *window, opus_int32 Fs)
+{
+   int i, c;
+   int inc = 48000/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<overlap;i++)
+      {
+         opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
+         out[i*channels+c] = ADD32(MULT16_32_Q15(w,in2[i*channels+c]),
+                                   MULT16_32_Q15(Q15ONE-w, in1[i*channels+c]));
+      }
+   }
+}
+#else
+static void smooth_fade(const opus_res *in1, const opus_res *in2,
+      opus_res *out, int overlap, int channels,
      const opus_val16 *window, opus_int32 Fs)
 {
   int i, c;
@ -218,6 +236,7 @@ static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2,
      }
   }
 }
+#endif

 static int opus_packet_get_mode(const unsigned char *data)
 {
@ -235,22 +254,20 @@ static int opus_packet_get_mode(const unsigned char *data)
 }

 static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+      opus_int32 len, opus_res *pcm, int frame_size, int decode_fec)
 {
   void *silk_dec;
   CELTDecoder *celt_dec;
   int i, silk_ret=0, celt_ret=0;
   ec_dec dec;
   opus_int32 silk_frame_size;
-   int pcm_silk_size;
-   VARDECL(opus_int16, pcm_silk);
   int pcm_transition_silk_size;
-   VARDECL(opus_val16, pcm_transition_silk);
+   VARDECL(opus_res, pcm_transition_silk);
   int pcm_transition_celt_size;
-   VARDECL(opus_val16, pcm_transition_celt);
-   opus_val16 *pcm_transition=NULL;
+   VARDECL(opus_res, pcm_transition_celt);
+   opus_res *pcm_transition=NULL;
   int redundant_audio_size;
-   VARDECL(opus_val16, redundant_audio);
+   VARDECL(opus_res, redundant_audio);

   int audiosize;
   int mode;
@ -335,11 +352,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,

   /* In fixed-point, we can tell CELT to do the accumulation on top of the
      SILK PCM buffer. This saves some stack space. */
-#ifdef FIXED_POINT
-   celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10);
-#else
-   celt_accum = 0;
-#endif
+   celt_accum = (mode != MODE_CELT_ONLY);

   pcm_transition_silk_size = ALLOC_NONE;
   pcm_transition_celt_size = ALLOC_NONE;
@ -355,7 +368,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
      else
         pcm_transition_silk_size = F5*st->channels;
   }
-   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16);
+   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_res);
   if (transition && mode == MODE_CELT_ONLY)
   {
      pcm_transition = pcm_transition_celt;
@ -370,21 +383,22 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
      frame_size = audiosize;
   }

-   /* Don't allocate any memory when in CELT-only mode */
-   pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE;
-   ALLOC(pcm_silk, pcm_silk_size, opus_int16);
-
   /* SILK processing */
   if (mode != MODE_CELT_ONLY)
   {
      int lost_flag, decoded_samples;
-      opus_int16 *pcm_ptr;
-#ifdef FIXED_POINT
-      if (celt_accum)
-         pcm_ptr = pcm;
-      else
-#endif
+      opus_res *pcm_ptr;
+      int pcm_too_small;
+      int pcm_silk_size = ALLOC_NONE;
+      VARDECL(opus_res, pcm_silk);
+      pcm_too_small = (frame_size < F10);
+      if (pcm_too_small)
+         pcm_silk_size = F10*st->channels;
+      ALLOC(pcm_silk, pcm_silk_size, opus_res);
+      if (pcm_too_small)
         pcm_ptr = pcm_silk;
+      else
+         pcm_ptr = pcm;

      if (st->prev_mode==MODE_CELT_ONLY)
         silk_ResetDecoder( silk_dec );
@ -447,6 +461,9 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
        pcm_ptr += silk_frame_size * st->channels;
        decoded_samples += silk_frame_size;
      } while( decoded_samples < frame_size );
+     if (pcm_too_small) {
+        OPUS_COPY(pcm, pcm_silk, frame_size*st->channels);
+     }
   }

   start_band = 0;
@ -488,7 +505,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
      pcm_transition_silk_size=ALLOC_NONE;
   }

-   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16);
+   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_res);

   if (transition && mode != MODE_CELT_ONLY)
   {
@ -526,7 +543,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,

   /* Only allocation memory for redundancy if/when needed */
   redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE;
-   ALLOC(redundant_audio, redundant_audio_size, opus_val16);
+   ALLOC(redundant_audio, redundant_audio_size, opus_res);

   /* 5 ms redundant frame for CELT->SILK*/
   if (redundancy && celt_to_silk)
@ -574,17 +591,6 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
      }
   }

-   if (mode != MODE_CELT_ONLY && !celt_accum)
-   {
-#ifdef FIXED_POINT
-      for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i]));
-#else
-      for (i=0;i<frame_size*st->channels;i++)
-         pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]);
-#endif
-   }
-
   {
      const CELTMode *celt_mode;
      MUST_SUCCEED(celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)));
@ -668,7 +674,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
 }

 int opus_decode_native(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec,
+      opus_int32 len, opus_res *pcm, int frame_size, int decode_fec,
      int self_delimited, opus_int32 *packet_offset, int soft_clip, const OpusDRED *dred, opus_int32 dred_offset)
 {
   int i, nb_samples;
@ -811,20 +817,11 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
 }

 #ifdef FIXED_POINT
-
+#ifdef ENABLE_RES24
 int opus_decode(OpusDecoder *st, const unsigned char *data,
      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
 {
-   if(frame_size<=0)
-      return OPUS_BAD_ARG;
-   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
-}
-
-#ifndef DISABLE_FLOAT_API
-int opus_decode_float(OpusDecoder *st, const unsigned char *data,
-      opus_int32 len, float *pcm, int frame_size, int decode_fec)
-{
-   VARDECL(opus_int16, out);
+	   VARDECL(opus_res, out);
 	   int ret, i;
 	   int nb_samples;
 	   ALLOC_STACK;
@ -843,13 +840,57 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
 	         return OPUS_INVALID_PACKET;
 	   }
 	   celt_assert(st->channels == 1 || st->channels == 2);
-   ALLOC(out, frame_size*st->channels, opus_int16);
+	   ALLOC(out, frame_size*st->channels, opus_res);

 	   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
 	   if (ret > 0)
 	   {
 	      for (i=0;i<ret*st->channels;i++)
-         pcm[i] = (1.f/32768.f)*(out[i]);
+	         pcm[i] = RES2INT16(out[i]);
+	   }
+	   RESTORE_STACK;
+	   return ret;
+}
+#else
+int opus_decode(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
+{
+   if(frame_size<=0)
+      return OPUS_BAD_ARG;
+   return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+}
+#endif
+
+#ifndef DISABLE_FLOAT_API
+int opus_decode_float(OpusDecoder *st, const unsigned char *data,
+      opus_int32 len, float *pcm, int frame_size, int decode_fec)
+{
+   VARDECL(opus_res, out);
+   int ret, i;
+   int nb_samples;
+   ALLOC_STACK;
+
+   if(frame_size<=0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   if (data != NULL && len > 0 && !decode_fec)
+   {
+      nb_samples = opus_decoder_get_nb_samples(st, data, len);
+      if (nb_samples>0)
+         frame_size = IMIN(frame_size, nb_samples);
+      else
+         return OPUS_INVALID_PACKET;
+   }
+   celt_assert(st->channels == 1 || st->channels == 2);
+   ALLOC(out, frame_size*st->channels, opus_res);
+
+   ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
+   if (ret > 0)
+   {
+      for (i=0;i<ret*st->channels;i++)
+         pcm[i] = RES2FLOAT(out[i]);
   }
   RESTORE_STACK;
   return ret;
@ -887,7 +928,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
   if (ret > 0)
   {
      for (i=0;i<ret*st->channels;i++)
-         pcm[i] = FLOAT2INT16(out[i]);
+         pcm[i] = RES2INT16(out[i]);
   }
   RESTORE_STACK;
   return ret;
--- a/src/opus_multistream_decoder.c
+++ b/src/opus_multistream_decoder.c
@ -193,7 +193,7 @@ int opus_multistream_decode_native(
   int s, c;
   char *ptr;
   int do_plc=0;
-   VARDECL(opus_val16, buf);
+   VARDECL(opus_res, buf);
   ALLOC_STACK;

   VALIDATE_MS_DECODER(st);
@ -205,7 +205,7 @@ int opus_multistream_decode_native(
   /* Limit frame_size to avoid excessive stack allocations. */
   MUST_SUCCEED(opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs)));
   frame_size = IMIN(frame_size, Fs/25*3);
-   ALLOC(buf, 2*frame_size, opus_val16);
+   ALLOC(buf, 2*frame_size, opus_res);
   ptr = (char*)st + align(sizeof(OpusMSDecoder));
   coupled_size = opus_decoder_get_size(2);
   mono_size = opus_decoder_get_size(1);
@ -311,7 +311,7 @@ static void opus_copy_channel_out_float(
  void *dst,
  int dst_stride,
  int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
  int src_stride,
  int frame_size,
  void *user_data
@ -324,11 +324,7 @@ static void opus_copy_channel_out_float(
   if (src != NULL)
   {
      for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-         float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride];
-#else
-         float_dst[i*dst_stride+dst_channel] = src[i*src_stride];
-#endif
+         float_dst[i*dst_stride+dst_channel] = RES2FLOAT(src[i*src_stride]);
   }
   else
   {
@ -342,7 +338,7 @@ static void opus_copy_channel_out_short(
  void *dst,
  int dst_stride,
  int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
  int src_stride,
  int frame_size,
  void *user_data
@ -355,11 +351,7 @@ static void opus_copy_channel_out_short(
   if (src != NULL)
   {
      for (i=0;i<frame_size;i++)
-#if defined(FIXED_POINT)
-         short_dst[i*dst_stride+dst_channel] = src[i*src_stride];
-#else
-         short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]);
-#endif
+         short_dst[i*dst_stride+dst_channel] = RES2INT16(src[i*src_stride]);
   }
   else
   {
--- a/src/opus_private.h
+++ b/src/opus_private.h
@ -138,7 +138,7 @@ typedef void (*opus_copy_channel_out_func)(
  void *dst,
  int dst_stride,
  int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
  int src_stride,
  int frame_size,
  void *user_data
@ -186,7 +186,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
      int analysis_channels, downmix_func downmix, int float_api);

 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
-      opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
+      opus_res *pcm, int frame_size, int decode_fec, int self_delimited,
      opus_int32 *packet_offset, int soft_clip, const OpusDRED *dred, opus_int32 dred_offset);

 /* Make sure everything is properly aligned. */
--- a/src/opus_projection_decoder.c
+++ b/src/opus_projection_decoder.c
@ -49,7 +49,7 @@ static void opus_projection_copy_channel_out_float(
  void *dst,
  int dst_stride,
  int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
  int src_stride,
  int frame_size,
  void *user_data)
@ -72,7 +72,7 @@ static void opus_projection_copy_channel_out_short(
  void *dst,
  int dst_stride,
  int dst_channel,
-  const opus_val16 *src,
+  const opus_res *src,
  int src_stride,
  int frame_size,
  void *user_data)
--- a/tests/test_opus_projection.c
+++ b/tests/test_opus_projection.c
@ -94,13 +94,13 @@ void test_simple_matrix(void)

  int i, ret;
  opus_int32 simple_matrix_size;
-  opus_val16 *input_val16;
+  opus_res *input_pcm;
  opus_val16 *output_val16;
  opus_int16 *output_int16;
  MappingMatrix *simple_matrix;

  /* Allocate input/output buffers. */
-  input_val16 = (opus_val16 *)opus_alloc(sizeof(opus_val16) * SIMPLE_MATRIX_INPUT_SIZE);
+  input_pcm = (opus_res *)opus_alloc(sizeof(opus_res) * SIMPLE_MATRIX_INPUT_SIZE);
  output_int16 = (opus_int16 *)opus_alloc(sizeof(opus_int16) * SIMPLE_MATRIX_OUTPUT_SIZE);
  output_val16 = (opus_val16 *)opus_alloc(sizeof(opus_val16) * SIMPLE_MATRIX_OUTPUT_SIZE);

@ -118,11 +118,7 @@ void test_simple_matrix(void)
  /* Copy inputs. */
  for (i = 0; i < SIMPLE_MATRIX_INPUT_SIZE; i++)
  {
-#ifdef FIXED_POINT
-    input_val16[i] = input_int16[i];
-#else
-    input_val16[i] = (1/32768.f)*input_int16[i];
-#endif
+    input_pcm[i] = INT16TORES(input_int16[i]);
  }

  /* _in_short */
@ -144,7 +140,7 @@ void test_simple_matrix(void)
  for (i = 0; i < simple_matrix->cols; i++)
  {
    mapping_matrix_multiply_channel_out_short(simple_matrix,
-      &input_val16[i], i, simple_matrix->cols, output_int16,
+      &input_pcm[i], i, simple_matrix->cols, output_int16,
      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
  }
  ret = assert_is_equal_short(output_int16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
@ -158,7 +154,7 @@ void test_simple_matrix(void)
  for (i = 0; i < simple_matrix->rows; i++)
  {
    mapping_matrix_multiply_channel_in_float(simple_matrix,
-      input_val16, simple_matrix->cols, &output_val16[i], i,
+      input_pcm, simple_matrix->cols, &output_val16[i], i,
      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
  }
  ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
@ -171,7 +167,7 @@ void test_simple_matrix(void)
  for (i = 0; i < simple_matrix->cols; i++)
  {
    mapping_matrix_multiply_channel_out_float(simple_matrix,
-      &input_val16[i], i, simple_matrix->cols, output_val16,
+      &input_pcm[i], i, simple_matrix->cols, output_val16,
      simple_matrix->rows, SIMPLE_MATRIX_FRAME_SIZE);
  }
  ret = assert_is_equal(output_val16, expected_output_int16, SIMPLE_MATRIX_OUTPUT_SIZE, ERROR_TOLERANCE);
@ -179,7 +175,7 @@ void test_simple_matrix(void)
    test_failed();
 #endif

-  opus_free(input_val16);
+  opus_free(input_pcm);
  opus_free(output_int16);
  opus_free(output_val16);
  opus_free(simple_matrix);