Merge branch 'exp_analysis'

Conflicts: celt/celt_encoder.c
2025-05-30 15:17:42 +00:00 · 2013-03-08 12:29:53 -05:00 · 2013-03-08 12:29:53 -05:00 · f96fc8cc83
commit f96fc8cc83
parent fa43c770e0 73142b100a
10 changed files with 936 additions and 196 deletions
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@ -1329,7 +1329,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,

      prefilter_tapset = st->tapset_decision;
      pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
-      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
+      if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
            && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
         pitch_change = 1;
      if (pf_on==0)
@ -1353,15 +1353,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,

   isTransient = 0;
   shortBlocks = 0;
+   if (st->complexity >= 1)
+   {
+      isTransient = transient_analysis(in, N+st->overlap, CC,
+            &tf_estimate, &tf_chan);
+   }
   if (LM>0 && ec_tell(enc)+3<=total_bits)
   {
-      if (st->complexity >= 1)
-      {
-         isTransient = transient_analysis(in, N+st->overlap, CC,
-                  &tf_estimate, &tf_chan);
-         if (isTransient)
-            shortBlocks = M;
-      }
+      if (isTransient)
+         shortBlocks = M;
+   } else {
+      isTransient = 0;
   }

   ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
--- a/include/opus_defines.h
+++ b/include/opus_defines.h
@ -148,8 +148,9 @@ extern "C" {
 #define OPUS_GET_GAIN_REQUEST                4045 /* Should have been 4035 */
 #define OPUS_SET_LSB_DEPTH_REQUEST           4036
 #define OPUS_GET_LSB_DEPTH_REQUEST           4037
-
 #define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
+#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
+#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041

 /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */

@ -185,6 +186,15 @@ extern "C" {
 #define OPUS_BANDWIDTH_SUPERWIDEBAND         1104 /**<12 kHz bandpass @hideinitializer*/
 #define OPUS_BANDWIDTH_FULLBAND              1105 /**<20 kHz bandpass @hideinitializer*/

+#define OPUS_FRAMESIZE_ARG                   5000 /**< Select frame size from the argument (default) */
+#define OPUS_FRAMESIZE_2_5_MS                5001 /**< Use 2.5 ms frames */
+#define OPUS_FRAMESIZE_5_MS                  5002 /**< Use 5 ms frames */
+#define OPUS_FRAMESIZE_10_MS                 5003 /**< Use 10 ms frames */
+#define OPUS_FRAMESIZE_20_MS                 5004 /**< Use 20 ms frames */
+#define OPUS_FRAMESIZE_40_MS                 5005 /**< Use 40 ms frames */
+#define OPUS_FRAMESIZE_60_MS                 5006 /**< Use 60 ms frames */
+#define OPUS_FRAMESIZE_VARIABLE              5010 /**< Optimize the frame size dynamically */
+
 /**@}*/


@ -525,6 +535,32 @@ extern "C" {
  * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
  * @hideinitializer */
 #define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Configures the encoder's use of variable duration frames.
+  * When enabled, the encoder is free to use a shorter frame size than the one
+  * requested in the opus_encode*() call. It is then the user's responsibility
+  * to verify how much audio was encoded by checking the ToC byte of the encoded
+  * packet. The part of the audio that was not encoded needs to be resent to the
+  * encoder for the next call. Do not use this option unless you <b>really</b>
+  * know what you are doing.
+  * @see OPUS_GET_EXPERT_VARIABLE_DURATION
+  * @param[in] x <tt>opus_int32</tt>: Allowed values:
+  * <dl>
+  * <dt>0</dt><dd>Disable variable duration (default).</dd>
+  * <dt>1</dt><dd>Enable variable duration.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured use of variable duration frames.
+  * @see OPUS_SET_EXPERT_VARIABLE_DURATION
+  * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
+  * <dl>
+  * <dt>0</dt><dd>variable duration disabled (default).</dd>
+  * <dt>1</dt><dd>variable duration enabled.</dd>
+  * </dl>
+  * @hideinitializer */
+#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
+
 /**@}*/

 /** @defgroup opus_genericctls Generic CTLs
--- a/src/analysis.c
+++ b/src/analysis.c
@ -139,10 +139,56 @@ static inline float fast_atan2f(float y, float x) {
   }
 }

-void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
+{
+   int pos;
+   int curr_lookahead;
+   float psum;
+   int i;
+
+   pos = tonal->read_pos;
+   curr_lookahead = tonal->write_pos-tonal->read_pos;
+   if (curr_lookahead<0)
+      curr_lookahead += DETECT_SIZE;
+
+   if (len > 480 && pos != tonal->write_pos)
+   {
+      pos++;
+      if (pos==DETECT_SIZE)
+         pos=0;
+   }
+   if (pos == tonal->write_pos)
+      pos--;
+   if (pos<0)
+      pos = DETECT_SIZE-1;
+   OPUS_COPY(info_out, &tonal->info[pos], 1);
+   tonal->read_subframe += len/120;
+   while (tonal->read_subframe>=4)
+   {
+      tonal->read_subframe -= 4;
+      tonal->read_pos++;
+   }
+   if (tonal->read_pos>=DETECT_SIZE)
+      tonal->read_pos-=DETECT_SIZE;
+
+   /* Compensate for the delay in the features themselves.
+      FIXME: Need a better estimate the 10 I just made up */
+   curr_lookahead = IMAX(curr_lookahead-10, 0);
+
+   psum=0;
+   for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
+      psum += tonal->pmusic[i];
+   for (;i<DETECT_SIZE;i++)
+      psum += tonal->pspeech[i];
+   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
+   /*printf("%f %f\n", psum, info_out->music_prob);*/
+
+   info_out->music_prob = psum;
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
 {
    int i, b;
-    const CELTMode *mode;
    const kiss_fft_state *kfft;
    kiss_fft_cpx in[480], out[480];
    int N = 480, N2=240;
@ -163,14 +209,15 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
    float slope=0;
    float frame_stationarity;
    float relativeE;
-    float frame_prob;
+    float frame_probs[2];
    float alpha, alphaE, alphaE2;
    float frame_loudness;
    float bandwidth_mask;
    int bandwidth=0;
    float maxE = 0;
    float noise_floor;
-    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
+    int remaining;
+    AnalysisInfo *info;

    tonal->last_transition++;
    alpha = 1.f/IMIN(20, 1+tonal->count);
@ -179,27 +226,32 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc

    if (tonal->count<4)
       tonal->music_prob = .5;
-    kfft = mode->mdct.kfft[0];
-    if (C==1)
+    kfft = celt_mode->mdct.kfft[0];
+    if (tonal->count==0)
+       tonal->mem_fill = 240;
+    downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
+    if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
    {
-       for (i=0;i<N2;i++)
-       {
-          float w = analysis_window[i];
-          in[i].r = MULT16_16(w, x[i]);
-          in[i].i = MULT16_16(w, x[N-N2+i]);
-          in[N-i-1].r = MULT16_16(w, x[N-i-1]);
-          in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
-       }
-    } else {
-       for (i=0;i<N2;i++)
-       {
-          float w = analysis_window[i];
-          in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
-          in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
-          in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
-          in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
-       }
+       tonal->mem_fill += len;
+       /* Don't have enough to update the analysis */
+       return;
    }
+    info = &tonal->info[tonal->write_pos++];
+    if (tonal->write_pos>=DETECT_SIZE)
+       tonal->write_pos-=DETECT_SIZE;
+
+    for (i=0;i<N2;i++)
+    {
+       float w = analysis_window[i];
+       in[i].r = MULT16_16(w, tonal->inmem[i]);
+       in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
+       in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
+       in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
+    }
+    OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
+    remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
+    downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
+    tonal->mem_fill = 240 + remaining;
    opus_fft(kfft, in, out);

    for (i=1;i<N2;i++)
@ -417,27 +469,91 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
    features[24] = tonal->lowECount;

 #ifndef FIXED_POINT
-    mlp_process(&net, features, &frame_prob);
-    frame_prob = .5f*(frame_prob+1);
+    mlp_process(&net, features, frame_probs);
+    frame_probs[0] = .5f*(frame_probs[0]+1);
    /* Curve fitting between the MLP probability and the actual probability */
-    frame_prob = .01f + 1.21f*frame_prob*frame_prob - .23f*(float)pow(frame_prob, 10);
+    frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
+    frame_probs[1] = .5*frame_probs[1]+.5;
+    frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5;

-    /*printf("%f\n", frame_prob);*/
+    /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
    {
       float tau, beta;
       float p0, p1;
-       float max_certainty;
       /* One transition every 3 minutes */
-       tau = .00005f;
-       beta = .1f;
-       max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);
+       tau = .00005f*frame_probs[1];
+       beta = .05f;
+       if (1) {
+          /* Adapt beta based on how "unexpected" the new prob is */
+          float p, q;
+          p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
+          q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
+          beta = .01+.05*ABS16(p-q)/(p*(1-q)+q*(1-p));
+       }
       p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
       p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
-       p0 *= (float)pow(1-frame_prob, beta);
-       p1 *= (float)pow(frame_prob, beta);
-       tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
+       p0 *= (float)pow(1-frame_probs[0], beta);
+       p1 *= (float)pow(frame_probs[0], beta);
+       tonal->music_prob = p1/(p0+p1);
       info->music_prob = tonal->music_prob;
-       /*printf("%f %f\n", frame_prob, info->music_prob);*/
+
+       float psum=1e-20;
+       float speech0 = (float)pow(1-frame_probs[0], beta);
+       float music0  = (float)pow(frame_probs[0], beta);
+       if (tonal->count==1)
+       {
+          tonal->pspeech[0]=.5;
+          tonal->pmusic [0]=.5;
+       }
+       float s0, m0;
+       s0 = tonal->pspeech[0] + tonal->pspeech[1];
+       m0 = tonal->pmusic [0] + tonal->pmusic [1];
+       tonal->pspeech[0] = s0*(1-tau)*speech0;
+       tonal->pmusic [0] = m0*(1-tau)*music0;
+       for (i=1;i<DETECT_SIZE-1;i++)
+       {
+          tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
+          tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
+       }
+       tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
+       tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
+
+       for (i=0;i<DETECT_SIZE;i++)
+          psum += tonal->pspeech[i] + tonal->pmusic[i];
+       psum = 1.f/psum;
+       for (i=0;i<DETECT_SIZE;i++)
+       {
+          tonal->pspeech[i] *= psum;
+          tonal->pmusic [i] *= psum;
+       }
+       psum = tonal->pmusic[0];
+       for (i=1;i<DETECT_SIZE;i++)
+          psum += tonal->pspeech[i];
+
+       /* Estimate our confidence in the speech/music decisions */
+       if (frame_probs[1]>.75)
+       {
+          if (tonal->music_prob>.9)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->music_confidence_count);
+             tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
+             tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
+          }
+          if (tonal->music_prob<.1)
+          {
+             float adapt;
+             adapt = 1.f/(++tonal->speech_confidence_count);
+             tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
+             tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
+          }
+       } else {
+          if (tonal->music_confidence_count==0)
+             tonal->music_confidence = .9;
+          if (tonal->speech_confidence_count==0)
+             tonal->speech_confidence = .1;
+       }
+       psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));
    }
    if (tonal->last_music != (tonal->music_prob>.5f))
       tonal->last_transition=0;
@ -465,4 +581,48 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
    /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
    info->noisiness = frame_noisiness;
    info->valid = 1;
+    if (info_out!=NULL)
+       OPUS_COPY(info_out, info, 1);
+}
+
+int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
+                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+{
+   int offset;
+   int pcm_len;
+
+   /* Avoid overflow/wrap-around of the analysis buffer */
+   frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
+
+   pcm_len = frame_size - analysis->analysis_offset;
+   offset = 0;
+   do {
+      tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
+      offset += 480;
+      pcm_len -= 480;
+   } while (pcm_len>0);
+   analysis->analysis_offset = frame_size;
+
+   if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
+   {
+      int LM = 3;
+      LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,
+            analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
+      while ((Fs/400<<LM)>frame_size)
+         LM--;
+      frame_size = (Fs/400<<LM);
+   } else {
+      frame_size = frame_size_select(frame_size, variable_duration, Fs);
+   }
+   if (frame_size<0)
+      return -1;
+   analysis->analysis_offset -= frame_size;
+
+   /* Only perform analysis up to 20-ms frames. Longer ones will be split if
+      they're in CELT-only mode. */
+   analysis_info->valid = 0;
+   tonality_get_info(analysis, analysis_info, frame_size);
+
+   return frame_size;
 }
--- a/src/analysis.h
+++ b/src/analysis.h
@ -28,18 +28,27 @@
 #ifndef ANALYSIS_H
 #define ANALYSIS_H

+#include "celt.h"
+#include "opus_private.h"
+
 #define NB_FRAMES 8
 #define NB_TBANDS 18
 #define NB_TOT_BANDS 21
+#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
+
+#define DETECT_SIZE 200

 typedef struct {
   float angle[240];
   float d_angle[240];
   float d2_angle[240];
+   float inmem[ANALYSIS_BUF_SIZE];
+   int   mem_fill;                      /* number of usable samples in the buffer */
   float prev_band_tonality[NB_TBANDS];
   float prev_tonality;
   float E[NB_FRAMES][NB_TBANDS];
-   float lowE[NB_TBANDS], highE[NB_TBANDS];
+   float lowE[NB_TBANDS];
+   float highE[NB_TBANDS];
   float meanE[NB_TOT_BANDS];
   float mem[32];
   float cmean[8];
@ -52,9 +61,27 @@ typedef struct {
   int last_transition;
   int count;
   int opus_bandwidth;
+   opus_val32   subframe_mem[3];
+   int analysis_offset;
+   float pspeech[DETECT_SIZE];
+   float pmusic[DETECT_SIZE];
+   float speech_confidence;
+   float music_confidence;
+   int speech_confidence_count;
+   int music_confidence_count;
+   int write_pos;
+   int read_pos;
+   int read_subframe;
+   AnalysisInfo info[DETECT_SIZE];
 } TonalityAnalysisState;

 void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
-     CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);
+     const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);
+
+void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
+
+int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
+                        const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
+                        int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);

 #endif
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@ -3,74 +3,103 @@

 #include "mlp.h"

-/* RMS error was 0.179835, seed was 1322103961 */
+/* RMS error was 0.138320, seed was 1361535663 */

-static const float weights[271] = {
+static const float weights[422] = {

 /* hidden layer */
-1.55597f, -0.0739792f, -0.0646761f, -0.099531f, -0.0794943f,
-0.0180174f, -0.0391354f, 0.0508224f, -0.0160169f, -0.0773263f,
-0.0300002f, -0.0865361f, 0.124477f, -0.28648f, -0.0860702f,
-0.518949f, -0.0873341f, -0.235393f, -0.907833f, -0.383573f,
-0.535388f, -0.57944f, 0.98116f, 0.8482f, 1.12426f,
-3.23721f, -0.647072f, -0.0265139f, 0.0711052f, -0.00125666f,
-0.0396181f, -0.44282f, -0.510495f, -0.201865f, 0.0134336f,
-0.167205f, -0.155406f, 0.00041678f, -0.00468705f, -0.0233224f,
-0.264279f, -0.301375f, 0.00234895f, 0.0144741f, -0.137535f,
-0.200323f, 0.0192027f, 3.19818f, 2.03495f, 0.705517f,
-4.6025f, -0.11485f, -0.792716f, 0.150714f, 0.10608f,
-0.240633f, 0.0690698f, 0.0695297f, 0.124819f, 0.0501433f,
-0.0460952f, 0.147639f, 0.10327f, 0.158007f, 0.113714f,
-0.0276191f, 0.0680749f, -0.130012f, 0.0796126f, 0.133067f,
-0.51495f, 0.747578f, -0.128742f, 5.98112f, -1.16698f,
-0.276492f, -1.73549f, -3.90234f, 2.01489f, -0.040118f,
-0.113002f, -0.146751f, -0.113569f, 0.0534873f, 0.0989832f,
-0.0872875f, 0.049266f, 0.0367557f, -0.00889148f, -0.0648461f,
-0.00190352f, 0.0143773f, 0.0259364f, -0.0592133f, -0.0672924f,
-0.1399f, -0.0987886f, -0.347402f, 0.101326f, -0.0680876f,
-0.469186f, 0.246922f, 10.4017f, 3.44846f, -0.662725f,
-0.0328208f, -0.0561274f, -0.0167744f, 0.00044282f, -0.0457645f,
-0.0408314f, -0.013113f, -0.0373873f, -0.0474122f, -0.0273745f,
-0.0308505f, 0.000582959f, -0.0421135f, 0.464859f, 0.196842f,
-0.320538f, 0.0435528f, -0.200168f, 0.266475f, -0.0853727f,
-1.20397f, 0.711542f, -1.04397f, -1.47759f, 1.26768f,
-0.446958f, 0.266477f, -0.30802f, 0.28431f, -0.118541f,
-0.00836345f, 0.0689026f, -0.0137996f, -0.0395417f, 0.26982f,
-0.206255f, 0.16066f, 0.114757f, 0.359587f, -0.106503f,
-0.0948534f, 0.175358f, -0.122966f, -0.0056675f, 0.483848f,
-0.134916f, -0.427567f, -0.140172f, -1.0866f, -2.73921f,
-0.549843f, 0.17685f, 0.0010675f, -0.00137386f, 0.0884424f,
-0.0698736f, -0.00174136f, 0.0718775f, -0.0396849f, 0.0448056f,
-0.0577853f, -0.0372353f, 0.134599f, 0.0260656f, 0.140322f,
-0.22704f, -0.020568f, -0.0142424f, -0.21723f, -0.997704f,
-0.884573f, -0.163495f, 2.33617f, 0.224142f, 0.19635f,
-0.957387f, 0.144678f, 1.47035f, -0.00700498f, -0.0472309f,
-0.0137848f, -0.0189145f, 0.00856479f, 0.0316965f, 0.00613373f,
-0.00209807f, 0.00270964f, -0.0490206f, 0.0105712f, -0.0465045f,
-0.0381532f, -0.0985268f, -0.108297f, 0.0146409f, -0.0040718f,
-0.0698572f, -0.380568f, -0.230479f, 3.98917f, 0.457652f,
-1.02355f, -7.4435f, -0.475314f, 1.61743f, 0.0254017f,
-0.00791293f, 0.047217f, 0.0220995f, -0.0304311f, 0.0052168f,
-0.0404054f, -0.0230293f, 0.00169229f, -0.0138178f, 0.0043137f,
-0.0598088f, -0.133601f, 0.0555138f, -0.177358f, -0.159856f,
-0.137281f, 0.108051f, -0.305973f, 0.393775f, 0.0747287f,
-0.783993f, -0.875086f, 1.06862f, 0.340519f, -0.352681f,
-0.0830912f, -0.100017f, 0.0729085f, -0.00829403f, 0.027489f,
-0.0779597f, 0.082286f, -0.164181f, -0.41519f, 0.00282335f,
-0.29573f, 0.125571f, 0.726935f, 0.392137f, 0.491348f,
-0.0723196f, -0.0259758f, -0.0636332f, -0.452384f, -0.000225974f,
-2.34001f, 2.45211f, -0.544628f, 5.62944f, -3.44507f,
+-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
+-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
+-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
+0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
+0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
+24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
+-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
+-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
+-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
+1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
+15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
+0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
+-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
+0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
+0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
+-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
+-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
+-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
+0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
+-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
+2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
+0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
+-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
+0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
+0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
+-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
+5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
+-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
+-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
+-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
+1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
+-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
+-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
+0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
+0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
+-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
+10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
+-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
+-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
+-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
+0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
+-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
+0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
+0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
+-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
+0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
+-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
+-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
+-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
+-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
+-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
+5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
+1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
+0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
+-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
+0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
+-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
+-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
+0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
+-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
+-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
+0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
+-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
+0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
+-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
+-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
+0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
+-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
+0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
+-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
+0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
+-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
+4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
+0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
+-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
+0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
+0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
+3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,

 /* output layer */
-3.13835f, 0.994751f, 0.444901f, 1.59518f, 1.23665f,
-3.37012f, -1.34606f, 1.99131f, 1.33476f, 1.3885f,
-1.12559f, };
+-0.381439, 0.12115, -0.906927, 2.93878, 1.6388,
+0.882811, 0.874344, 1.21726, -0.874545, 0.321706,
+0.785055, 0.946558, -0.575066, -3.46553, 0.884905,
+0.0924047, -9.90712, 0.391338, 0.160103, -2.04954,
+4.1455, 0.0684029, -0.144761, -0.285282, 0.379244,
+-1.1584, -0.0277241, -9.85, -4.82386, 3.71333,
+3.87308, 3.52558, };

-static const int topo[3] = {25, 10, 1};
+static const int topo[3] = {25, 15, 2};

 const MLP net = {
-	3,
-	topo,
-	weights
+    3,
+    topo,
+    weights
 };
-
--- a/src/mlp_train.c
+++ b/src/mlp_train.c
@ -106,6 +106,7 @@ MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int
 }

 #define MAX_NEURONS 100
+#define MAX_OUT 10

 double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)
 {
@ -120,7 +121,8 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
 	double netOut[MAX_NEURONS];
 	double error[MAX_NEURONS];

-        *error_rate = 0;
+	for (i=0;i<outDim;i++)
+	   error_rate[i] = 0;
 	topo = net->topo;
 	inDim = net->topo[0];
 	hiddenDim = net->topo[1];
@ -153,7 +155,7 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
 			netOut[i] = tansig_approx(sum);
 			error[i] = out[i] - netOut[i];
 			rms += error[i]*error[i];
-			*error_rate += fabs(error[i])>1;
+			error_rate[i] += fabs(error[i])>1;
 			/*error[i] = error[i]/(1+fabs(error[i]));*/
 		}
 		/* Back-propagate error */
@ -194,7 +196,7 @@ struct GradientArg {
 	double *W0_grad;
 	double *W1_grad;
 	double rms;
-	double error_rate;
+	double error_rate[MAX_OUT];
 };

 void *gradient_thread_process(void *_arg)
@ -213,7 +215,7 @@ void *gradient_thread_process(void *_arg)
 		sem_wait(&sem_begin[arg->id]);
 		if (arg->done)
 			break;
-		arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate);
+		arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate);
 		sem_post(&sem_end[arg->id]);
 	}
 	fprintf(stderr, "done\n");
@ -295,7 +297,7 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
 	for (e=0;e<nbEpoch;e++)
 	{
 		double rms=0;
-                double error_rate = 0;
+		double error_rate[2] = {0,0};
 		for (i=0;i<NB_THREADS;i++)
 		{
 			sem_post(&sem_begin[i]);
@ -306,7 +308,8 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
 		{
 			sem_wait(&sem_end[i]);
 			rms += args[i].rms;
-			error_rate += args[i].error_rate;
+			error_rate[0] += args[i].error_rate[0];
+            error_rate[1] += args[i].error_rate[1];
 			for (j=0;j<W0_size;j++)
 				W0_grad[j] += args[i].W0_grad[j];
 			for (j=0;j<W1_size;j++)
@ -315,8 +318,9 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam

 		float mean_rate = 0, min_rate = 1e10;
 		rms = (rms/(outDim*nbSamples));
-		error_rate = (error_rate/(outDim*nbSamples));
-		fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms);
+		error_rate[0] = (error_rate[0]/(nbSamples));
+        error_rate[1] = (error_rate[1]/(nbSamples));
+		fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms);
 		if (rms < best_rms)
 		{
 			best_rms = rms;
@ -445,6 +449,7 @@ int main(int argc, char **argv)
 	outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
 	
 	seed = time(NULL);
+    /*seed = 1361480659;*/
 	fprintf (stderr, "Seed is %u\n", seed);
 	srand(seed);
 	build_tansig_table();
--- a/src/opus_demo.c
+++ b/src/opus_demo.c
@ -53,6 +53,7 @@ void print_usage( char* argv[] )
    fprintf(stderr, "-d                   : only runs the decoder (reads the bit-stream as input)\n" );
    fprintf(stderr, "-cbr                 : enable constant bitrate; default: variable bitrate\n" );
    fprintf(stderr, "-cvbr                : enable constrained variable bitrate; default: unconstrained\n" );
+    fprintf(stderr, "-variable-duration   : enable frames of variable duration (experts only); default: disabled\n" );
    fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );
    fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );
    fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );
@ -221,6 +222,8 @@ int main(int argc, char *argv[])
    short *in, *out;
    int application=OPUS_APPLICATION_AUDIO;
    double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;
+    double tot_samples=0;
+    opus_uint64 tot_in, tot_out;
    int bandwidth=-1;
    const char *bandwidth_string;
    int lost = 0, lost_prev = 1;
@ -239,6 +242,10 @@ int main(int argc, char *argv[])
    int curr_mode=0;
    int curr_mode_count=0;
    int mode_switch_time = 48000;
+    int nb_encoded;
+    int remaining=0;
+    int variable_duration=OPUS_FRAMESIZE_ARG;
+    int delayed_decision=0;

    if (argc < 5 )
    {
@ -246,6 +253,7 @@ int main(int argc, char *argv[])
       return EXIT_FAILURE;
    }

+    tot_in=tot_out=0;
    fprintf(stderr, "%s\n", opus_get_version_string());

    args = 1;
@ -306,7 +314,7 @@ int main(int argc, char *argv[])
    forcechannels = OPUS_AUTO;
    use_dtx = 0;
    packet_loss_perc = 0;
-    max_frame_size = 960*6;
+    max_frame_size = 2*48000;
    curr_read=0;

    while( args < argc - 2 ) {
@ -374,6 +382,14 @@ int main(int argc, char *argv[])
            check_encoder_option(decode_only, "-cvbr");
            cvbr = 1;
            args++;
+        } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) {
+            check_encoder_option(decode_only, "-variable-duration");
+            variable_duration = OPUS_FRAMESIZE_VARIABLE;
+            args++;
+        } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) {
+            check_encoder_option(decode_only, "-delayed-decision");
+            delayed_decision = 1;
+            args++;
        } else if( strcmp( argv[ args ], "-dtx") == 0 ) {
            check_encoder_option(decode_only, "-dtx");
            use_dtx = 1;
@ -499,6 +515,7 @@ int main(int argc, char *argv[])

       opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip));
       opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16));
+       opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
    }
    if (!encode_only)
    {
@ -554,6 +571,26 @@ int main(int argc, char *argv[])
    if ( use_inbandfec ) {
        data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
    }
+    if(delayed_decision)
+    {
+       if (variable_duration!=OPUS_FRAMESIZE_VARIABLE)
+       {
+          if (frame_size==sampling_rate/400)
+             variable_duration = OPUS_FRAMESIZE_2_5_MS;
+          else if (frame_size==sampling_rate/200)
+             variable_duration = OPUS_FRAMESIZE_5_MS;
+          else if (frame_size==sampling_rate/100)
+             variable_duration = OPUS_FRAMESIZE_10_MS;
+          else if (frame_size==sampling_rate/50)
+             variable_duration = OPUS_FRAMESIZE_20_MS;
+          else if (frame_size==sampling_rate/25)
+             variable_duration = OPUS_FRAMESIZE_40_MS;
+          else
+             variable_duration = OPUS_FRAMESIZE_60_MS;
+          opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
+       }
+       frame_size = 2*48000;
+    }
    while (!stop)
    {
        if (delayed_celt)
@ -617,22 +654,28 @@ int main(int argc, char *argv[])
                opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
                frame_size = mode_list[curr_mode][2];
            }
-            err = fread(fbytes, sizeof(short)*channels, frame_size, fin);
+            err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
            curr_read = err;
+            tot_in += curr_read;
            for(i=0;i<curr_read*channels;i++)
            {
                opus_int32 s;
                s=fbytes[2*i+1]<<8|fbytes[2*i];
                s=((s&0xFFFF)^0x8000)-0x8000;
-                in[i]=s;
+                in[i+remaining*channels]=s;
            }
-            if (curr_read < frame_size)
+            if (curr_read+remaining < frame_size)
            {
-                for (i=curr_read*channels;i<frame_size*channels;i++)
+                for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++)
                   in[i] = 0;
-                stop = 1;
+                if (encode_only || decode_only)
+                   stop = 1;
            }
            len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
+            nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]);
+            remaining = frame_size-nb_encoded;
+            for(i=0;i<remaining*channels;i++)
+               in[i] = in[nb_encoded*channels+i];
            if (sweep_bps!=0)
            {
               bitrate_bps += sweep_bps;
@ -681,6 +724,7 @@ int main(int argc, char *argv[])
               fprintf(stderr, "Error writing.\n");
               return EXIT_FAILURE;
            }
+            tot_samples += nb_encoded;
        } else {
            int output_samples;
            lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc);
@ -703,6 +747,11 @@ int main(int argc, char *argv[])
                }
                if (output_samples>0)
                {
+                    if (!decode_only && tot_out + output_samples > tot_in)
+                    {
+                       stop=1;
+                       output_samples  = tot_in-tot_out;
+                    }
                    if (output_samples>skip) {
                       int i;
                       for(i=0;i<(output_samples-skip)*channels;i++)
@ -716,6 +765,7 @@ int main(int argc, char *argv[])
                          fprintf(stderr, "Error writing.\n");
                          return EXIT_FAILURE;
                       }
+                       tot_out += output_samples-skip;
                    }
                    if (output_samples<skip) skip -= output_samples;
                    else skip = 0;
@ -723,6 +773,7 @@ int main(int argc, char *argv[])
                   fprintf(stderr, "error decoding frame: %s\n",
                                   opus_strerror(output_samples));
                }
+                tot_samples += output_samples;
            }
        }

@ -767,7 +818,7 @@ int main(int argc, char *argv[])
        toggle = (toggle + use_inbandfec) & 1;
    }
    fprintf (stderr, "average bitrate:             %7.3f kb/s\n",
-                     1e-3*bits*sampling_rate/(frame_size*(double)count));
+                     1e-3*bits*sampling_rate/tot_samples);
    fprintf (stderr, "maximum bitrate:             %7.3f kb/s\n",
                     1e-3*bits_max*sampling_rate/frame_size);
    if (!decode_only)
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@ -67,6 +67,7 @@ struct OpusEncoder {
    opus_int32   Fs;
    int          use_vbr;
    int          vbr_constraint;
+    int          variable_duration;
    opus_int32   bitrate_bps;
    opus_int32   user_bitrate_bps;
    int          lsb_depth;
@ -89,7 +90,8 @@ struct OpusEncoder {
    opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
 #ifndef FIXED_POINT
    TonalityAnalysisState analysis;
-    int                   detected_bandwidth;
+    int          detected_bandwidth;
+    int          analysis_offset;
 #endif
    opus_uint32  rangeFinal;
 };
@ -213,6 +215,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
    st->voice_ratio = -1;
    st->encoder_buffer = st->Fs/100;
    st->lsb_depth = 24;
+    st->variable_duration = OPUS_FRAMESIZE_ARG;

    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead 
       + 1.5 ms for SILK resamplers and stereo prediction) */
@ -535,8 +538,258 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
    return st->user_bitrate_bps;
 }

+#ifndef FIXED_POINT
+/* Don't use more than 60 ms for the frame size analysis */
+#define MAX_DYNAMIC_FRAMESIZE 24
+/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
+static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
+{
+   int i;
+   int M;
+   float sumE=0, sumE_1=0;
+   float metric;
+
+   M = IMIN(maxM, (1<<LM)+1);
+   for (i=0;i<M;i++)
+   {
+      sumE += E[i];
+      sumE_1 += E_1[i];
+   }
+   metric = sumE*sumE_1/(M*M);
+   /*if (LM==3)
+      printf("%f\n", metric);*/
+   /*return metric>10 ? 1 : 0;*/
+   /*return MAX16(0,1-exp(-.25*(metric-2.)));*/
+   return MIN16(1,sqrt(MAX16(0,.05*(metric-2))));
+}
+
+/* Viterbi decoding trying to find the best frame size combination using look-ahead
+
+   State numbering:
+    0: unused
+    1:  2.5 ms
+    2:  5 ms (#1)
+    3:  5 ms (#2)
+    4: 10 ms (#1)
+    5: 10 ms (#2)
+    6: 10 ms (#3)
+    7: 10 ms (#4)
+    8: 20 ms (#1)
+    9: 20 ms (#2)
+   10: 20 ms (#3)
+   11: 20 ms (#4)
+   12: 20 ms (#5)
+   13: 20 ms (#6)
+   14: 20 ms (#7)
+   15: 20 ms (#8)
+*/
+static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
+{
+   int i;
+   float cost[MAX_DYNAMIC_FRAMESIZE][16];
+   int states[MAX_DYNAMIC_FRAMESIZE][16];
+   float best_cost;
+   int best_state;
+
+   /* Makes variable framesize less aggressive at lower bitrates, but I can't
+      find any valid theretical justification for this (other than it seems
+      to help) */
+   frame_cost *= 720/rate;
+   for (i=0;i<16;i++)
+   {
+      /* Impossible state */
+      states[0][i] = -1;
+      cost[0][i] = 1e10;
+   }
+   for (i=0;i<4;i++)
+   {
+      cost[0][1<<i] = frame_cost + rate*(1<<i)*transient_boost(E, E_1, i, N+1);
+      states[0][1<<i] = i;
+   }
+   for (i=1;i<N;i++)
+   {
+      int j;
+
+      /* Follow continuations */
+      for (j=2;j<16;j++)
+      {
+         cost[i][j] = cost[i-1][j-1];
+         states[i][j] = j-1;
+      }
+
+      /* New frames */
+      for(j=0;j<4;j++)
+      {
+         int k;
+         float min_cost;
+         float curr_cost;
+         states[i][1<<j] = 1;
+         min_cost = cost[i-1][1];
+         for(k=1;k<4;k++)
+         {
+            float tmp = cost[i-1][(1<<(k+1))-1];
+            if (tmp < min_cost)
+            {
+               states[i][1<<j] = (1<<(k+1))-1;
+               min_cost = tmp;
+            }
+         }
+         curr_cost = frame_cost+rate*(1<<j)*transient_boost(E+i, E_1+i, j, N-i+1);
+         cost[i][1<<j] = min_cost;
+         /* If part of the frame is outside the analysis window, only count part of the cost */
+         if (N-i < (1<<j))
+            cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
+         else
+            cost[i][1<<j] += curr_cost;
+      }
+   }
+
+   best_state=1;
+   best_cost = cost[N-1][1];
+   /* Find best end state (doesn't force a frame to end at N-1) */
+   for (i=2;i<16;i++)
+   {
+      if (cost[N-1][i]<best_cost)
+      {
+         best_cost = cost[N-1][i];
+         best_state = i;
+      }
+   }
+
+   /* Follow transitions back */
+   for (i=N-1;i>=0;i--)
+   {
+      /*printf("%d ", best_state);*/
+      best_state = states[i][best_state];
+   }
+   /*printf("%d\n", best_state);*/
+   return best_state;
+}
+
+void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)
+{
+   const float *x;
+   int c, j;
+   x = (const float *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = x[(j+offset)*C];
+   for (c=1;c<C;c++)
+      for (j=0;j<subframe;j++)
+         sub[j] += x[(j+offset)*C+c];
+}
+
+void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)
+{
+   const opus_int16 *x;
+   int c, j;
+   x = (const opus_int16 *)_x;
+   for (j=0;j<subframe;j++)
+      sub[j] = x[(j+offset)*C];
+   for (c=1;c<C;c++)
+      for (j=0;j<subframe;j++)
+         sub[j] += x[(j+offset)*C+c];
+}
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+                int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
+                downmix_func downmix)
+{
+   int N;
+   int i;
+   float e[MAX_DYNAMIC_FRAMESIZE+4];
+   float e_1[MAX_DYNAMIC_FRAMESIZE+3];
+   float memx;
+   int bestLM=0;
+   int subframe;
+   int pos;
+   VARDECL(opus_val16, sub);
+
+   subframe = Fs/400;
+   ALLOC(sub, subframe, opus_val16);
+   e[0]=mem[0];
+   e_1[0]=1./(EPSILON+mem[0]);
+   if (buffering)
+   {
+      /* Consider the CELT delay when not in restricted-lowdelay */
+      /* We assume the buffering is between 2.5 and 5 ms */
+      int offset = 2*subframe - buffering;
+      celt_assert(offset>=0 && offset <= subframe);
+      x += C*offset;
+      len -= offset;
+      e[1]=mem[1];
+      e_1[1]=1./(EPSILON+mem[1]);
+      e[2]=mem[2];
+      e_1[2]=1./(EPSILON+mem[2]);
+      pos = 3;
+   } else {
+      pos=1;
+   }
+   N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
+   memx = x[0];
+   for (i=0;i<N;i++)
+   {
+      float tmp;
+      float tmpx;
+      int j;
+      tmp=EPSILON;
+
+      downmix(x, sub, subframe, i*subframe, C);
+      if (i==0)
+         memx = sub[0];
+      for (j=0;j<subframe;j++)
+      {
+         tmpx = sub[j];
+         tmp += (tmpx-memx)*(tmpx-memx);
+         memx = tmpx;
+      }
+      e[i+pos] = tmp;
+      e_1[i+pos] = 1.f/tmp;
+   }
+   /* Hack to get 20 ms working with APPLICATION_AUDIO
+      The real problem is that the corresponding memory needs to use 1.5 ms
+      from this frame and 1 ms from the next frame */
+   e[i+pos] = e[i+pos-1];
+   if (buffering)
+      N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
+   bestLM = transient_viterbi(e, e_1, N, (1.f+.5*tonality)*(40*C+40), bitrate/400);
+   mem[0] = e[1<<bestLM];
+   if (buffering)
+   {
+      mem[1] = e[(1<<bestLM)+1];
+      mem[2] = e[(1<<bestLM)+2];
+   }
+   return bestLM;
+}
+
+#endif
+
+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
+{
+   int new_size;
+   if (frame_size<Fs/400)
+      return -1;
+   if (variable_duration == OPUS_FRAMESIZE_ARG)
+      new_size = frame_size;
+   else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
+      new_size = Fs/50;
+   else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
+      new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
+   else
+      return -1;
+   if (new_size>frame_size)
+      return -1;
+   if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
+            50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
+      return -1;
+   return new_size;
+}
+
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
-                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth)
+                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
+#ifndef FIXED_POINT
+                , AnalysisInfo *analysis_info
+#endif
+                )
 {
    void *silk_enc;
    CELTEncoder *celt_enc;
@ -563,11 +816,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
    int curr_bandwidth;
    opus_val16 HB_gain;
    opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
-    int extra_buffer, total_buffer;
-    int perform_analysis=0;
-#ifndef FIXED_POINT
-    AnalysisInfo analysis_info;
-#endif
+    int total_buffer;
    VARDECL(opus_val16, tmp_prefill);

    ALLOC_STACK;
@ -575,36 +824,37 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
    max_data_bytes = IMIN(1276, out_data_bytes);

    st->rangeFinal = 0;
-    if (400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
+    if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
         50*frame_size != st->Fs &&  25*frame_size != st->Fs &&  50*frame_size != 3*st->Fs)
-    {
-       RESTORE_STACK;
-       return OPUS_BAD_ARG;
-    }
-    if (max_data_bytes<=0)
+         || (400*frame_size < st->Fs)
+         || max_data_bytes<=0
+         )
    {
       RESTORE_STACK;
       return OPUS_BAD_ARG;
    }
    silk_enc = (char*)st+st->silk_enc_offset;
    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
-
-    lsb_depth = IMIN(lsb_depth, st->lsb_depth);
-
-#ifndef FIXED_POINT
-    perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000;
-#endif
    if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
       delay_compensation = 0;
    else
       delay_compensation = st->delay_compensation;
-    if (perform_analysis)
+
+    lsb_depth = IMIN(lsb_depth, st->lsb_depth);
+
+    st->voice_ratio = -1;
+
+#ifndef FIXED_POINT
+    st->detected_bandwidth = 0;
+    if (analysis_info->valid)
    {
-       total_buffer = IMAX(st->Fs/200, delay_compensation);
-    } else {
-       total_buffer = delay_compensation;
+       if (st->signal_type == OPUS_AUTO)
+          st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));
+       st->detected_bandwidth = analysis_info->opus_bandwidth;
    }
-    extra_buffer = total_buffer-delay_compensation;
+#endif
+
+    total_buffer = delay_compensation;
    st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);

    frame_rate = st->Fs/frame_size;
@ -916,7 +1166,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
          /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
          if (to_celt && i==nb_frames-1)
             st->user_forced_mode = MODE_CELT_ONLY;
-          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth);
+          tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth
+#ifndef FIXED_POINT
+                , analysis_info
+#endif
+                );
          if (tmp_len<0)
          {
             RESTORE_STACK;
@ -942,7 +1196,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
       RESTORE_STACK;
       return ret;
    }
-
    curr_bandwidth = st->bandwidth;

    /* Chooses the appropriate mode for speech
@ -981,22 +1234,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
       dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
    }

-#ifndef FIXED_POINT
-    if (perform_analysis)
-    {
-       int nb_analysis_frames;
-       nb_analysis_frames = frame_size/(st->Fs/100);
-       for (i=0;i<nb_analysis_frames;i++)
-          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels, lsb_depth);
-       if (st->signal_type == OPUS_AUTO)
-          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
-       st->detected_bandwidth = analysis_info.opus_bandwidth;
-    } else {
-       analysis_info.valid = 0;
-       st->voice_ratio = -1;
-       st->detected_bandwidth = 0;
-    }
-#endif
+

    /* SILK processing */
    HB_gain = Q15ONE;
@ -1205,9 +1443,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        } else {
            if (st->use_vbr)
            {
+                opus_int32 bonus=0;
+#ifndef FIXED_POINT
+                if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
+                {
+                   bonus = (40*st->stream_channels+40)*(st->Fs/frame_size-50);
+                   if (analysis_info->valid)
+                      bonus = bonus*(1.f+.5*analysis_info->tonality);
+                }
+#endif
                celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
                celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
-                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
+                celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
                nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
            } else {
                nb_compr_bytes = bytes_target;
@ -1222,7 +1469,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
    if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
    {
       for (i=0;i<st->channels*st->Fs/400;i++)
-          tmp_prefill[i] = st->delay_buffer[(extra_buffer+st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
    }

    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
@ -1236,7 +1483,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
       const CELTMode *celt_mode;

       celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
-       gain_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels,
+       gain_fade(pcm_buf, pcm_buf,
             st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
    }
    st->prev_HB_gain = HB_gain;
@ -1258,7 +1505,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
            g1 *= (1.f/16384);
            g2 *= (1.f/16384);
 #endif
-            stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap,
+            stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
                  frame_size, st->channels, celt_mode->window, st->Fs);
            st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
        }
@ -1312,7 +1559,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        int err;
        celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
        celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
-        err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+        err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
        if (err < 0)
        {
           RESTORE_STACK;
@ -1339,10 +1586,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        if (ec_tell(&enc) <= 8*nb_compr_bytes)
        {
 #ifndef FIXED_POINT
-           if (perform_analysis)
-              celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+           celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
 #endif
-           ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc);
+           ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
           if (ret < 0)
           {
              RESTORE_STACK;
@ -1365,9 +1611,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));

        /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
-        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL);
+        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);

-        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
        if (err < 0)
        {
           RESTORE_STACK;
@ -1440,6 +1686,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
   VARDECL(opus_int16, in);
   ALLOC_STACK;

+   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
   if(frame_size<0)
   {
      RESTORE_STACK;
@ -1459,6 +1706,12 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
                unsigned char *data, opus_int32 out_data_bytes)
 {
+   frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
+   if(frame_size<0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);
 }

@ -1467,21 +1720,74 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
      unsigned char *data, opus_int32 max_data_bytes)
 {
   int i, ret;
+   const CELTMode *celt_mode;
+   int delay_compensation;
+   int lsb_depth;
   VARDECL(float, in);
+   AnalysisInfo analysis_info;
   ALLOC_STACK;

+   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+
+   lsb_depth = IMIN(16, st->lsb_depth);
+
+   analysis_info.valid = 0;
+   if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+   {
+      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
+            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);
+   } else {
+      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
+   }
+   if(frame_size<0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
   ALLOC(in, frame_size*st->channels, float);

   for (i=0;i<frame_size*st->channels;i++)
      in[i] = (1.0f/32768)*pcm[i];
-   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);
+   ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);
   RESTORE_STACK;
   return ret;
 }
 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
                      unsigned char *data, opus_int32 out_data_bytes)
 {
-   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24);
+   const CELTMode *celt_mode;
+   int delay_compensation;
+   int lsb_depth;
+   AnalysisInfo analysis_info;
+
+   opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
+   if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+      delay_compensation = 0;
+   else
+      delay_compensation = st->delay_compensation;
+
+   lsb_depth = IMIN(24, st->lsb_depth);
+
+   analysis_info.valid = 0;
+   if (st->silk_mode.complexity >= 7 && st->Fs==48000)
+   {
+      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
+            frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);
+   } else {
+      frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
+   }
+   if(frame_size<0)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+
+   return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);

 }
 #endif
@ -1750,6 +2056,18 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
            *value = st->lsb_depth;
        }
        break;
+        case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+        {
+            opus_int32 value = va_arg(ap, opus_int32);
+            st->variable_duration = value;
+        }
+        break;
+        case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+        {
+            opus_int32 *value = va_arg(ap, opus_int32*);
+            *value = st->variable_duration;
+        }
+        break;
        case OPUS_RESET_STATE:
        {
           void *silk_enc;
@ -1779,6 +2097,15 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
            st->user_forced_mode = value;
        }
        break;
+
+        case CELT_GET_MODE_REQUEST:
+        {
+           const CELTMode ** value = va_arg(ap, const CELTMode**);
+           if (value==0)
+              goto bad_arg;
+           celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
+        }
+        break;
        default:
            /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
            ret = OPUS_UNIMPLEMENTED;
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@ -36,10 +36,14 @@
 #include <stdarg.h>
 #include "float_cast.h"
 #include "os_support.h"
+#include "analysis.h"

 struct OpusMSEncoder {
+   TonalityAnalysisState analysis;
   ChannelLayout layout;
-   int bitrate;
+   int variable_duration;
+   opus_int32 bitrate_bps;
+   opus_val32 subframe_mem[3];
   /* Encoder states go here */
 };

@ -102,6 +106,8 @@ int opus_multistream_encoder_init(
   st->layout.nb_streams = streams;
   st->layout.nb_coupled_streams = coupled_streams;

+   st->bitrate_bps = OPUS_AUTO;
+   st->variable_duration = OPUS_FRAMESIZE_ARG;
   for (i=0;i<st->layout.nb_channels;i++)
      st->layout.mapping[i] = mapping[i];
   if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
@ -182,6 +188,10 @@ static int opus_multistream_encode_native
    unsigned char *data,
    opus_int32 max_data_bytes,
    int lsb_depth
+#ifndef FIXED_POINT
+    , downmix_func downmix
+    , const void *pcm_analysis
+#endif
 )
 {
   opus_int32 Fs;
@ -193,10 +203,43 @@ static int opus_multistream_encode_native
   VARDECL(opus_val16, buf);
   unsigned char tmp_data[MS_FRAME_TMP];
   OpusRepacketizer rp;
+   int orig_frame_size;
+   int coded_channels;
+   opus_int32 channel_rate;
+   opus_int32 complexity;
+   AnalysisInfo analysis_info;
+   const CELTMode *celt_mode;
   ALLOC_STACK;

   ptr = (char*)st + align(sizeof(OpusMSEncoder));
   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
+   opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));
+   opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
+
+   if (400*frame_size < Fs)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
+   orig_frame_size = IMIN(frame_size,Fs/50);
+#ifndef FIXED_POINT
+   analysis_info.valid = 0;
+   if (complexity >= 7 && Fs==48000)
+   {
+      opus_int32 delay_compensation;
+      int channels;
+
+      channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+      opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
+      delay_compensation -= Fs/400;
+
+      frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,
+            frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);
+   } else
+#endif
+   {
+      frame_size = frame_size_select(frame_size, st->variable_duration, Fs);
+   }
   /* Validate frame_size before using it to allocate stack space.
      This mirrors the checks in opus_encode[_float](). */
   if (400*frame_size != Fs && 200*frame_size != Fs &&
@ -215,6 +258,39 @@ static int opus_multistream_encode_native
      RESTORE_STACK;
      return OPUS_BUFFER_TOO_SMALL;
   }
+
+   /* Compute bitrate allocation between streams (this could be a lot better) */
+   coded_channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
+   if (st->bitrate_bps==OPUS_AUTO)
+   {
+      channel_rate = Fs+60*Fs/orig_frame_size;
+   } else if (st->bitrate_bps==OPUS_BITRATE_MAX)
+   {
+      channel_rate = 300000;
+   } else {
+      channel_rate = st->bitrate_bps/coded_channels;
+   }
+#ifndef FIXED_POINT
+   if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
+   {
+      opus_int32 bonus;
+      bonus = 60*(Fs/frame_size-50);
+      channel_rate += bonus;
+   }
+#endif
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
+   for (s=0;s<st->layout.nb_streams;s++)
+   {
+      OpusEncoder *enc;
+      enc = (OpusEncoder*)ptr;
+      if (s < st->layout.nb_coupled_streams)
+         ptr += align(coupled_size);
+      else
+         ptr += align(mono_size);
+      opus_encoder_ctl(enc, OPUS_SET_BITRATE(channel_rate * (s < st->layout.nb_coupled_streams ? 2 : 1)));
+   }
+
+   ptr = (char*)st + align(sizeof(OpusMSEncoder));
   /* Counting ToC */
   tot_size = 0;
   for (s=0;s<st->layout.nb_streams;s++)
@ -246,7 +322,11 @@ static int opus_multistream_encode_native
      /* Reserve three bytes for the last stream and four for the others */
      curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
      curr_max = IMIN(curr_max,MS_FRAME_TMP);
-      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth);
+      len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth
+#ifndef FIXED_POINT
+            , &analysis_info
+#endif
+            );
      if (len<0)
      {
         RESTORE_STACK;
@ -345,8 +425,9 @@ int opus_multistream_encode_float
    opus_int32 max_data_bytes
 )
 {
+   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
   return opus_multistream_encode_native(st, opus_copy_channel_in_float,
-      pcm, frame_size, data, max_data_bytes, 24);
+      pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset);
 }

 int opus_multistream_encode(
@ -357,8 +438,9 @@ int opus_multistream_encode(
    opus_int32 max_data_bytes
 )
 {
+   int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
   return opus_multistream_encode_native(st, opus_copy_channel_in_short,
-      pcm, frame_size, data, max_data_bytes, 16);
+      pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset);
 }
 #endif

@ -378,20 +460,10 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
   {
   case OPUS_SET_BITRATE_REQUEST:
   {
-      int chan, s;
      opus_int32 value = va_arg(ap, opus_int32);
-      chan = st->layout.nb_streams + st->layout.nb_coupled_streams;
-      value /= chan;
-      for (s=0;s<st->layout.nb_streams;s++)
-      {
-         OpusEncoder *enc;
-         enc = (OpusEncoder*)ptr;
-         if (s < st->layout.nb_coupled_streams)
-            ptr += align(coupled_size);
-         else
-            ptr += align(mono_size);
-         opus_encoder_ctl(enc, request, value * (s < st->layout.nb_coupled_streams ? 2 : 1));
-      }
+      if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)
+         goto bad_arg;
+      st->bitrate_bps = value;
   }
   break;
   case OPUS_GET_BITRATE_REQUEST:
@ -504,7 +576,21 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
      }
      *value = (OpusEncoder*)ptr;
   }
-      break;
+   break;
+   case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
+   {
+       opus_int32 value = va_arg(ap, opus_int32);
+       if (value<0 || value>1)
+          goto bad_arg;
+       st->variable_duration = value;
+   }
+   break;
+   case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
+   {
+       opus_int32 *value = va_arg(ap, opus_int32*);
+       *value = st->variable_duration;
+   }
+   break;
   default:
      ret = OPUS_UNIMPLEMENTED;
      break;
@ -512,6 +598,9 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)

   va_end(ap);
   return ret;
+bad_arg:
+   va_end(ap);
+   return OPUS_BAD_ARG;
 }

 void opus_multistream_encoder_destroy(OpusMSEncoder *st)
--- a/src/opus_private.h
+++ b/src/opus_private.h
@ -31,6 +31,7 @@

 #include "arch.h"
 #include "opus.h"
+#include "celt.h"

 struct OpusRepacketizer {
   unsigned char toc;
@ -81,11 +82,24 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
 #define OPUS_SET_FORCE_MODE_REQUEST    11002
 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)

+typedef void (*downmix_func)(const void *, float *, int, int, int);
+void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);
+void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);
+
+int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
+                int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
+                downmix_func downmix);

 int encode_size(int size, unsigned char *data);

+opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
+
 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
-      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth);
+      unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
+#ifndef FIXED_POINT
+                , AnalysisInfo *analysis_info
+#endif
+      );

 int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
      opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,