Merge branch 'plc_challenge' into master

2025-06-01 08:07:41 +00:00 · 2022-09-07 00:38:55 -04:00 · 2022-09-07 00:38:55 -04:00 · 60450472a6
commit 60450472a6
parent 340ab3089b c1da818f39
20 changed files with 1501 additions and 48 deletions
--- a/dnn/Makefile.am
+++ b/dnn/Makefile.am
@ -8,6 +8,7 @@ include_HEADERS = include/lpcnet.h
 lib_LTLIBRARIES = liblpcnet.la
 noinst_HEADERS = arch.h  \
 		 burg.h \
 		 common.h  \
 		 freq.h  \
 		 _kiss_fft_guts.h  \
@ -16,6 +17,7 @@ noinst_HEADERS = arch.h  \
 		 lpcnet_private.h \
 		 opus_types.h  \
 		 nnet_data.h  \
 		 plc_data.h  \
 		 nnet.h  \
 		 pitch.h  \
 		 tansig_table.h \
@ -24,6 +26,7 @@ noinst_HEADERS = arch.h  \
 		 vec_neon.h
 liblpcnet_la_SOURCES = \
 	burg.c \
 	common.c \
 	kiss99.c \
 	lpcnet.c \
@ -31,6 +34,7 @@ liblpcnet_la_SOURCES = \
 	lpcnet_enc.c \
 	nnet.c \
 	nnet_data.c \
 	plc_data.c \
 	ceps_codebooks.c \
 	pitch.c \
 	freq.c \
@ -52,7 +56,7 @@ lpcnet_demo_LDADD = liblpcnet.la
 #dump_data_SOURCES = dump_data.c
 #dump_data_LDADD = $(DUMP_OBJ) $(LIBM)
-dump_data_SOURCES = common.c dump_data.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
+dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
 dump_data_LDADD = $(LIBM)
 dump_data_CFLAGS = $(AM_CFLAGS)
--- a/dnn/README.md
+++ b/dnn/README.md
@ -3,8 +3,14 @@
 Low complexity implementation of the WaveRNN-based LPCNet algorithm, as described in:
 - J.-M. Valin, J. Skoglund, [LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://jmvalin.ca/papers/lpcnet_icassp2019.pdf), *Proc. International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, arXiv:1810.11846, 2019.
 - J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [Neural Speech Synthesis on a Shoestring: Improving the Efficiency of LPCNet](https://jmvalin.ca/papers/improved_lpcnet.pdf), *Proc. ICASSP*, arxiv:2106.04129, 2022.
 - K. Subramani, J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [End-to-end LPCNet: A Neural Vocoder With Fully-Differentiable LPC Estimation](https://jmvalin.ca/papers/lpcnet_end2end.pdf), *Proc. INTERSPEECH*, arxiv:2106.04129, 2022.
 For coding/PLC applications of LPCNet, see:
 - J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
 - J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
 - J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
 # Introduction
@ -59,6 +65,22 @@ Alternatively, you can run the uncompressed analysis/synthesis using -features
 instead of -encode and -synthesis instead of -decode.
 The same functionality is available in the form of a library. See include/lpcnet.h for the API.
 To try packet loss concealment (PLC), you first need a PLC model, which you can get with:
 ```
 ./download_model.sh plc-3b1eab4
 ```
 or (for the PLC challenge submission):
 ```
 ./download_model.sh plc_challenge
 ```
 PLC can be tested with:
 ```
 ./lpcnet_demo -plc_file noncausal_dc error_pattern.txt input.pcm output.pcm
 ```
 where error_pattern.txt is a text file with one entry per 20-ms packet, with 1 meaning "packet lost" and 0 meaning "packet not lost".
 noncausal_dc is the non-causal (5-ms look-ahead) with special handling for DC offsets. It's also possible to use "noncausal", "causal",
 or "causal_dc".
 # Training a new model
 This codebase is also meant for research and it is possible to train new models. These are the steps to do that:
--- a/dnn/burg.c
+++ b/dnn/burg.c
@ -0,0 +1,245 @@
 /***********************************************************************
 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 - Redistributions of source code must retain the above copyright notice,
 this list of conditions and the following disclaimer.
 - Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
 - Neither the name of Internet Society, IETF or IETF Trust, nor the
 names of specific contributors, may be used to endorse or promote
 products derived from this software without specific prior written
 permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 #include <math.h>
 #include <string.h>
 #include <assert.h>
 #include "burg.h"
 #define MAX_FRAME_SIZE              384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
 #define SILK_MAX_ORDER_LPC          16
 #define FIND_LPC_COND_FAC           1e-5f
 /* sum of squares of a silk_float array, with result as double */
 static double silk_energy_FLP(
    const float    *data,
    int            dataSize
 )
 {
    int i;
    double   result;
    /* 4x unrolled loop */
    result = 0.0;
    for( i = 0; i < dataSize - 3; i += 4 ) {
        result += data[ i + 0 ] * (double)data[ i + 0 ] +
                  data[ i + 1 ] * (double)data[ i + 1 ] +
                  data[ i + 2 ] * (double)data[ i + 2 ] +
                  data[ i + 3 ] * (double)data[ i + 3 ];
    }
    /* add any remaining products */
    for( ; i < dataSize; i++ ) {
        result += data[ i ] * (double)data[ i ];
    }
    assert( result >= 0.0 );
    return result;
 }
 /* inner product of two silk_float arrays, with result as double */
 static double silk_inner_product_FLP(
    const float    *data1,
    const float    *data2,
    int            dataSize
 )
 {
    int i;
    double   result;
    /* 4x unrolled loop */
    result = 0.0;
    for( i = 0; i < dataSize - 3; i += 4 ) {
        result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
                  data1[ i + 1 ] * (double)data2[ i + 1 ] +
                  data1[ i + 2 ] * (double)data2[ i + 2 ] +
                  data1[ i + 3 ] * (double)data2[ i + 3 ];
    }
    /* add any remaining products */
    for( ; i < dataSize; i++ ) {
        result += data1[ i ] * (double)data2[ i ];
    }
    return result;
 }
 /* Compute reflection coefficients from input signal */
 float silk_burg_analysis(              /* O    returns residual energy                                     */
    float          A[],                /* O    prediction coefficients (length order)                      */
    const float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
    const float    minInvGain,         /* I    minimum inverse prediction gain                             */
    const int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
    const int      nb_subfr,           /* I    number of subframes stacked in x                            */
    const int      D                   /* I    order                                                       */
 )
 {
    int         k, n, s, reached_max_gain;
    double           C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
    const float *x_ptr;
    double           C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
    double           CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
    double           Af[ SILK_MAX_ORDER_LPC ];
    assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
    /* Compute autocorrelations, added over subframes */
    C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
    memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
    for( s = 0; s < nb_subfr; s++ ) {
        x_ptr = x + s * subfr_length;
        for( n = 1; n < D + 1; n++ ) {
            C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
        }
    }
    memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
    /* Initialize */
    CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
    invGain = 1.0f;
    reached_max_gain = 0;
    for( n = 0; n < D; n++ ) {
        /* Update first row of correlation matrix (without first element) */
        /* Update last row of correlation matrix (without last element, stored in reversed order) */
        /* Update C * Af */
        /* Update C * flipud(Af) (stored in reversed order) */
        for( s = 0; s < nb_subfr; s++ ) {
            x_ptr = x + s * subfr_length;
            tmp1 = x_ptr[ n ];
            tmp2 = x_ptr[ subfr_length - n - 1 ];
            for( k = 0; k < n; k++ ) {
                C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
                C_last_row[ k ]  -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
                Atmp = Af[ k ];
                tmp1 += x_ptr[ n - k - 1 ] * Atmp;
                tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
            }
            for( k = 0; k <= n; k++ ) {
                CAf[ k ] -= tmp1 * x_ptr[ n - k ];
                CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
            }
        }
        tmp1 = C_first_row[ n ];
        tmp2 = C_last_row[ n ];
        for( k = 0; k < n; k++ ) {
            Atmp = Af[ k ];
            tmp1 += C_last_row[  n - k - 1 ] * Atmp;
            tmp2 += C_first_row[ n - k - 1 ] * Atmp;
        }
        CAf[ n + 1 ] = tmp1;
        CAb[ n + 1 ] = tmp2;
        /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
        num = CAb[ n + 1 ];
        nrg_b = CAb[ 0 ];
        nrg_f = CAf[ 0 ];
        for( k = 0; k < n; k++ ) {
            Atmp = Af[ k ];
            num   += CAb[ n - k ] * Atmp;
            nrg_b += CAb[ k + 1 ] * Atmp;
            nrg_f += CAf[ k + 1 ] * Atmp;
        }
        assert( nrg_f > 0.0 );
        assert( nrg_b > 0.0 );
        /* Calculate the next order reflection (parcor) coefficient */
        rc = -2.0 * num / ( nrg_f + nrg_b );
        assert( rc > -1.0 && rc < 1.0 );
        /* Update inverse prediction gain */
        tmp1 = invGain * ( 1.0 - rc * rc );
        if( tmp1 <= minInvGain ) {
            /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
            rc = sqrt( 1.0 - minInvGain / invGain );
            if( num > 0 ) {
                /* Ensure adjusted reflection coefficients has the original sign */
                rc = -rc;
            }
            invGain = minInvGain;
            reached_max_gain = 1;
        } else {
            invGain = tmp1;
        }
        /* Update the AR coefficients */
        for( k = 0; k < (n + 1) >> 1; k++ ) {
            tmp1 = Af[ k ];
            tmp2 = Af[ n - k - 1 ];
            Af[ k ]         = tmp1 + rc * tmp2;
            Af[ n - k - 1 ] = tmp2 + rc * tmp1;
        }
        Af[ n ] = rc;
        if( reached_max_gain ) {
            /* Reached max prediction gain; set remaining coefficients to zero and exit loop */
            for( k = n + 1; k < D; k++ ) {
                Af[ k ] = 0.0;
            }
            break;
        }
        /* Update C * Af and C * Ab */
        for( k = 0; k <= n + 1; k++ ) {
            tmp1 = CAf[ k ];
            CAf[ k ]          += rc * CAb[ n - k + 1 ];
            CAb[ n - k + 1  ] += rc * tmp1;
        }
    }
    if( reached_max_gain ) {
        /* Convert to float */
        for( k = 0; k < D; k++ ) {
            A[ k ] = (float)( -Af[ k ] );
        }
        /* Subtract energy of preceding samples from C0 */
        for( s = 0; s < nb_subfr; s++ ) {
            C0 -= silk_energy_FLP( x + s * subfr_length, D );
        }
        /* Approximate residual energy */
        nrg_f = C0 * invGain;
    } else {
        /* Compute residual energy and store coefficients as float */
        nrg_f = CAf[ 0 ];
        tmp1 = 1.0;
        for( k = 0; k < D; k++ ) {
            Atmp = Af[ k ];
            nrg_f += CAf[ k + 1 ] * Atmp;
            tmp1  += Atmp * Atmp;
            A[ k ] = (float)(-Atmp);
        }
        nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
    }
    /* Return residual energy */
    return (float)nrg_f;
 }
--- a/dnn/burg.h
+++ b/dnn/burg.h
@ -0,0 +1,36 @@
 /***********************************************************************
 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 - Redistributions of source code must retain the above copyright notice,
 this list of conditions and the following disclaimer.
 - Redistributions in binary form must reproduce the above copyright
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
 - Neither the name of Internet Society, IETF or IETF Trust, nor the
 names of specific contributors, may be used to endorse or promote
 products derived from this software without specific prior written
 permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 float silk_burg_analysis(              /* O    returns residual energy                                     */
    float          A[],                /* O    prediction coefficients (length order)                      */
    const float    x[],                /* I    input signal, length: nb_subfr*(D+L_sub)                    */
    const float    minInvGain,         /* I    minimum inverse prediction gain                             */
    const int      subfr_length,       /* I    input signal subframe length (incl. D preceding samples)    */
    const int      nb_subfr,           /* I    number of subframes stacked in x                            */
    const int      D                   /* I    order                                                       */
 );
--- a/dnn/download_model.sh
+++ b/dnn/download_model.sh
@ -9,4 +9,5 @@ if [ ! -f $model ]; then
 fi
 tar xvf $model
 touch src/nnet_data.[ch]
 touch src/plc_data.[ch]
 mv src/*.[ch] .
--- a/dnn/dump_data.c
+++ b/dnn/dump_data.c
@ -138,9 +138,18 @@ int main(int argc, char **argv) {
  int encode = 0;
  int decode = 0;
  int quantize = 0;
  int burg = 0;
  srand(getpid());
  st = lpcnet_encoder_create();
  argv0=argv[0];
  if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
      burg = 1;
      training = 1;
  }
  if (argc == 4 && strcmp(argv[1], "-btest")==0) {
      burg = 1;
      training = 0;
  }
  if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
  if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
      training = 1;
@ -236,7 +245,8 @@ int main(int argc, char **argv) {
    if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
    if (training && ++gain_change_count > 2821) {
      float tmp, tmp2;
-      speech_gain = pow(10., (-20+(rand()%40))/20.);
+      speech_gain = pow(10., (-30+(rand()%40))/20.);
      if (rand()&1) speech_gain = -speech_gain;
      if (rand()%20==0) speech_gain *= .01;
      if (rand()%100==0) speech_gain = 0;
      gain_change_count = 0;
@ -247,13 +257,18 @@ int main(int argc, char **argv) {
    }
    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
    preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
    for (i=0;i<FRAME_SIZE;i++) {
      float g;
      float f = (float)i/FRAME_SIZE;
      g = f*speech_gain + (1-f)*old_speech_gain;
      x[i] *= g;
    }
    if (burg) {
      float ceps[2*NB_BANDS];
      burg_cepstral_analysis(ceps, x);
      fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
    }
    preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
    for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
    /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
    for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
--- a/dnn/freq.c
+++ b/dnn/freq.c
@ -37,6 +37,7 @@
 #include "freq.h"
 #include "pitch.h"
 #include "arch.h"
 #include "burg.h"
 #include <assert.h>
 #define SQUARE(x) ((x)*(x))
@ -58,6 +59,32 @@ typedef struct {
 } CommonState;
 void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
      tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
      tmp = 1.f/(tmp + 1e-9);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
 }
 float _lpcnet_lpc(
      opus_val16 *lpc, /* out: [0...p-1] LPC coefficients      */
      opus_val16 *rc,
@ -128,6 +155,54 @@ void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  }
 }
 void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
  int i;
  float burg_in[FRAME_SIZE];
  float burg_lpc[LPC_ORDER];
  float x[WINDOW_SIZE];
  float Eburg[NB_BANDS];
  float g;
  float E;
  kiss_fft_cpx LPC[FREQ_SIZE];
  float Ly[NB_BANDS];
  assert(order <= LPC_ORDER);
  assert(len <= FRAME_SIZE);
  for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
  g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
  g /= len - 2*(order-1);
  //printf("%g\n", g);
  RNN_CLEAR(x, WINDOW_SIZE);
  x[0] = 1;
  for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
  forward_transform(LPC, x);
  compute_band_energy_inverse(Eburg, LPC);
  for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
  float logMax = -2;
  float follow = -2;
  for (i=0;i<NB_BANDS;i++) {
    Ly[i] = log10(1e-2+Eburg[i]);
    Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
    logMax = MAX16(logMax, Ly[i]);
    follow = MAX16(follow-2.5, Ly[i]);
    E += Eburg[i];
  }
  dct(burg_cepstrum, Ly);
  burg_cepstrum[0] += - 4;
 }
 void burg_cepstral_analysis(float *ceps, const float *x) {
  int i;
  compute_burg_cepstrum(x,                &ceps[0       ], FRAME_SIZE/2, LPC_ORDER);
  compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
  for (i=0;i<NB_BANDS;i++) {
    float c0, c1;
    c0 = ceps[i];
    c1 = ceps[NB_BANDS+i];
    ceps[i         ] = .5*(c0+c1);
    ceps[NB_BANDS+i] = (c0-c1);
  }
 }
 void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
  int i;
  float sum[NB_BANDS] = {0};
--- a/dnn/freq.h
+++ b/dnn/freq.h
@ -47,6 +47,8 @@
 void compute_band_energy(float *bandE, const kiss_fft_cpx *X);
 void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P);
 void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order);
 void burg_cepstral_analysis(float *ceps, const float *x);
 void apply_window(float *x);
 void dct(float *out, const float *in);
--- a/dnn/include/lpcnet.h
+++ b/dnn/include/lpcnet.h
@ -176,11 +176,18 @@ LPCNET_EXPORT void lpcnet_destroy(LPCNetState *st);
  */
 LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *st, const float *features, short *output, int N);
 #define LPCNET_PLC_CAUSAL 0
 #define LPCNET_PLC_NONCAUSAL 1
 #define LPCNET_PLC_CODEC 2
 #define LPCNET_PLC_DC_FILTER 4
 LPCNET_EXPORT int lpcnet_plc_get_size(void);
-LPCNET_EXPORT void lpcnet_plc_init(LPCNetPLCState *st);
+LPCNET_EXPORT int lpcnet_plc_init(LPCNetPLCState *st, int options);
-LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(void);
+LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(int options);
 LPCNET_EXPORT void lpcnet_plc_destroy(LPCNetPLCState *st);
--- a/dnn/lpcnet.c
+++ b/dnn/lpcnet.c
@ -98,7 +98,6 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b
    compute_conv1d(&feature_conv1, conv1_out, net->feature_conv1_state, in);
    if (lpcnet->frame_count < FEATURE_CONV1_DELAY) RNN_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
    compute_conv1d(&feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
    celt_assert(FRAME_INPUT_SIZE == FEATURE_CONV2_OUT_SIZE);
    if (lpcnet->frame_count < FEATURES_DELAY) RNN_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
    _lpcnet_compute_dense(&feature_dense1, dense1_out, conv2_out);
    _lpcnet_compute_dense(&feature_dense2, condition, dense1_out);
@ -196,8 +195,12 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int
        last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
        pred_ulaw = lin2ulaw(pred);
        exc = run_sample_network(&lpcnet->nnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
-        if (i < preload) exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
+        if (i < preload) {
-        pcm = pred + ulaw2lin(exc);
+          exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
          pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
        } else {
          pcm = pred + ulaw2lin(exc);
        }
        RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
        lpcnet->last_sig[0] = pcm;
        lpcnet->last_exc = exc;
@ -205,7 +208,7 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int
        lpcnet->deemph_mem = pcm;
        if (pcm<-32767) pcm = -32767;
        if (pcm>32767) pcm = 32767;
-        output[i] = (int)floor(.5 + pcm);
+        if (i >= preload) output[i] = (int)floor(.5 + pcm);
    }
 }
--- a/dnn/lpcnet_demo.c
+++ b/dnn/lpcnet_demo.c
@ -40,40 +40,70 @@
 #define MODE_SYNTHESIS 3
 #define MODE_PLC 4
 void usage(void) {
    fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");
    fprintf(stderr, "       lpcnet_demo -decode <compressed.lpcnet> <output.pcm>\n");
    fprintf(stderr, "       lpcnet_demo -features <input.pcm> <features.f32>\n");
    fprintf(stderr, "       lpcnet_demo -synthesis <features.f32> <output.pcm>\n");
    fprintf(stderr, "       lpcnet_demo -plc <plc_options> <percent> <input.pcm> <output.pcm>\n");
    fprintf(stderr, "       lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n\n");
    fprintf(stderr, "  plc_options:\n");
    fprintf(stderr, "       causal:       normal (causal) PLC\n");
    fprintf(stderr, "       causal_dc:    normal (causal) PLC with DC offset compensation\n");
    fprintf(stderr, "       noncausal:    non-causal PLC\n");
    fprintf(stderr, "       noncausal_dc: non-causal PLC with DC offset compensation\n");
    exit(1);
 }
 int main(int argc, char **argv) {
    int mode;
    int plc_percent=0;
    FILE *fin, *fout;
-    if (argc != 4 && !(argc == 5 && strcmp(argv[1], "-plc") == 0))
+    FILE *plc_file = NULL;
-    {
+    const char *plc_options;
-        fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");
+    int plc_flags=-1;
-        fprintf(stderr, "       lpcnet_demo -decode <compressed.lpcnet> <output.pcm>\n");
+    if (argc < 4) usage();
        fprintf(stderr, "       lpcnet_demo -features <input.pcm> <features.f32>\n");
        fprintf(stderr, "       lpcnet_demo -synthesis <features.f32> <output.pcm>\n");
        fprintf(stderr, "       lpcnet_demo -plc <percent> <input.pcm> <output.pcm>\n");
        return 0;
    }
    if (strcmp(argv[1], "-encode") == 0) mode=MODE_ENCODE;
    else if (strcmp(argv[1], "-decode") == 0) mode=MODE_DECODE;
    else if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
    else if (strcmp(argv[1], "-synthesis") == 0) mode=MODE_SYNTHESIS;
    else if (strcmp(argv[1], "-plc") == 0) {
        mode=MODE_PLC;
-        plc_percent = atoi(argv[2]);
+        plc_options = argv[2];
-        argv++;
+        plc_percent = atoi(argv[3]);
        argv+=2;
        argc-=2;
    } else if (strcmp(argv[1], "-plc_file") == 0) {
        mode=MODE_PLC;
        plc_options = argv[2];
        plc_file = fopen(argv[3], "r");
        if (!plc_file) {
            fprintf(stderr, "Can't open %s\n", argv[3]);
            exit(1);
        }
        argv+=2;
        argc-=2;
    } else {
-        exit(1);
+        usage();
    }
    if (mode == MODE_PLC) {
        if (strcmp(plc_options, "causal")==0) plc_flags = LPCNET_PLC_CAUSAL;
        else if (strcmp(plc_options, "causal_dc")==0) plc_flags = LPCNET_PLC_CAUSAL | LPCNET_PLC_DC_FILTER;
        else if (strcmp(plc_options, "noncausal")==0) plc_flags = LPCNET_PLC_NONCAUSAL;
        else if (strcmp(plc_options, "noncausal_dc")==0) plc_flags = LPCNET_PLC_NONCAUSAL | LPCNET_PLC_DC_FILTER;
        else usage();
    }
    if (argc != 4) usage();
    fin = fopen(argv[2], "rb");
    if (fin == NULL) {
-	fprintf(stderr, "Can't open %s\n", argv[2]);
+        fprintf(stderr, "Can't open %s\n", argv[2]);
-	exit(1);
+        exit(1);
    }
    fout = fopen(argv[3], "wb");
    if (fout == NULL) {
-	fprintf(stderr, "Can't open %s\n", argv[3]);
+        fprintf(stderr, "Can't open %s\n", argv[3]);
-	exit(1);
+        exit(1);
    }
    if (mode == MODE_ENCODE) {
@ -131,21 +161,31 @@ int main(int argc, char **argv) {
        }
        lpcnet_destroy(net);
    } else if (mode == MODE_PLC) {
        short pcm[FRAME_SIZE];
        int count=0;
        int loss=0;
        int skip=0, extra=0;
        if ((plc_flags&0x3) == LPCNET_PLC_NONCAUSAL) skip=extra=80;
        LPCNetPLCState *net;
-        net = lpcnet_plc_create();
+        net = lpcnet_plc_create(plc_flags);
        while (1) {
            short pcm[FRAME_SIZE];
            size_t ret;
            ret = fread(pcm, sizeof(pcm[0]), FRAME_SIZE, fin);
            if (feof(fin) || ret != FRAME_SIZE) break;
-            if (count % 2 == 0) loss = rand() < RAND_MAX*(float)plc_percent/100.f;
+            if (count % 2 == 0) {
              if (plc_file != NULL) fscanf(plc_file, "%d", &loss);
              else loss = rand() < RAND_MAX*(float)plc_percent/100.f;
            }
            if (loss) lpcnet_plc_conceal(net, pcm);
            else lpcnet_plc_update(net, pcm);
-            fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
+            fwrite(&pcm[skip], sizeof(pcm[0]), FRAME_SIZE-skip, fout);
            skip = 0;
            count++;
        }
        if (extra) {
          lpcnet_plc_conceal(net, pcm);
          fwrite(pcm, sizeof(pcm[0]), extra, fout);
        }
        lpcnet_plc_destroy(net);
    } else {
        fprintf(stderr, "unknown action\n");
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@ -28,6 +28,10 @@
 #include "config.h"
 #endif
 #ifdef OPUS_BUILD
 #define celt_pitch_xcorr celt_pitch_xcorr_c
 #endif
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@ -540,7 +544,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
      ener = (1 + ener0 + celt_inner_prod(&st->exc_buf[i+off], &st->exc_buf[i+off], FRAME_SIZE/2));
      st->xc[2+2*st->pcount+sub][i] = 2*xcorr[i] / ener;
    }
-    if (0) {
+    if (1) {
      /* Upsample correlation by 3x and keep the max. */
      float interpolated[PITCH_MAX_PERIOD]={0};
      /* interp=sinc([-3:3]+1/3).*(.5+.5*cos(pi*[-3:3]/4.5)); interp=interp/sum(interp); */
--- a/dnn/lpcnet_plc.c
+++ b/dnn/lpcnet_plc.c
@ -30,24 +30,48 @@
 #include "lpcnet_private.h"
 #include "lpcnet.h"
 #include "plc_data.h"
 LPCNET_EXPORT int lpcnet_plc_get_size() {
  return sizeof(LPCNetPLCState);
 }
-LPCNET_EXPORT void lpcnet_plc_init(LPCNetPLCState *st) {
+LPCNET_EXPORT int lpcnet_plc_init(LPCNetPLCState *st, int options) {
  if (FEATURES_DELAY != 0) {
    fprintf(stderr, "PLC cannot work with non-zero FEATURES_DELAY\n");
    fprintf(stderr, "Recompile with a no-lookahead model (see README.md)\n");
    exit(1);
  }
  RNN_CLEAR(st, 1);
  lpcnet_init(&st->lpcnet);
  lpcnet_encoder_init(&st->enc);
  RNN_CLEAR(st->pcm, PLC_BUF_SIZE);
  st->pcm_fill = PLC_BUF_SIZE;
  st->skip_analysis = 0;
  st->blend = 0;
  st->loss_count = 0;
  st->dc_mem = 0;
  st->queued_update = 0;
  if ((options&0x3) == LPCNET_PLC_CAUSAL) {
    st->enable_blending = 1;
    st->non_causal = 0;
  } else if ((options&0x3) == LPCNET_PLC_NONCAUSAL) {
    st->enable_blending = 1;
    st->non_causal = 1;
  } else if ((options&0x3) == LPCNET_PLC_CODEC) {
    st->enable_blending = 0;
    st->non_causal = 0;
  } else {
    return -1;
  }
  st->remove_dc = !!(options&LPCNET_PLC_DC_FILTER);
  return 0;
 }
-LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create() {
+LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(int options) {
  LPCNetPLCState *st;
-  st = malloc(sizeof(*st));
+  st = calloc(sizeof(*st), 1);
-  lpcnet_plc_init(st);
+  lpcnet_plc_init(st, options);
  return st;
 }
@ -55,20 +79,73 @@ LPCNET_EXPORT void lpcnet_plc_destroy(LPCNetPLCState *st) {
  free(st);
 }
-LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
+static void compute_plc_pred(PLCNetState *net, float *out, const float *in) {
  float zeros[3*PLC_MAX_RNN_NEURONS] = {0};
  float dense_out[PLC_DENSE1_OUT_SIZE];
  _lpcnet_compute_dense(&plc_dense1, dense_out, in);
  compute_gruB(&plc_gru1, zeros, net->plc_gru1_state, dense_out);
  compute_gruB(&plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);
  _lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);
  /* Artificially boost the correlation to make harmonics cleaner. */
  out[19] = MIN16(.5f, out[19]+.1f);
 }
 void clear_state(LPCNetPLCState *st) {
  RNN_CLEAR(st->lpcnet.last_sig, LPC_ORDER);
  st->lpcnet.last_exc = lin2ulaw(0.f);;
  st->lpcnet.deemph_mem = 0;
  RNN_CLEAR(st->lpcnet.nnet.gru_a_state, GRU_A_STATE_SIZE);
  RNN_CLEAR(st->lpcnet.nnet.gru_b_state, GRU_B_STATE_SIZE);
 }
 #define DC_CONST 0.003
 /* In this causal version of the code, the DNN model implemented by compute_plc_pred()
   needs to generate two feature vectors to conceal the first lost packet.*/
 static int lpcnet_plc_update_causal(LPCNetPLCState *st, short *pcm) {
  int i;
  float x[FRAME_SIZE];
  short output[FRAME_SIZE];
  float plc_features[2*NB_BANDS+NB_FEATURES+1];
  short lp[FRAME_SIZE]={0};
  int delta = 0;
  if (st->remove_dc) {
    st->dc_mem += st->syn_dc;
    delta = st->syn_dc;
    st->syn_dc = 0;
    for (i=0;i<FRAME_SIZE;i++) {
      lp[i] = (int)floor(.5 + st->dc_mem);
      st->dc_mem += DC_CONST*(pcm[i] - st->dc_mem);
      pcm[i] -= lp[i];
    }
  }
  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
  burg_cepstral_analysis(plc_features, x);
  st->enc.pcount = 0;
  if (st->skip_analysis) {
    /*fprintf(stderr, "skip update\n");*/
    if (st->blend) {
      short tmp[FRAME_SIZE-TRAINING_OFFSET];
-      lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
+      float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
-      for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) {
+      RNN_COPY(zeros, plc_features, 2*NB_BANDS);
-        float w;
+      zeros[2*NB_BANDS+NB_FEATURES] = 1;
-        w = .5 - .5*cos(M_PI*i/(FRAME_SIZE-TRAINING_OFFSET));
+      st->plc_net = st->plc_copy;
-        pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*tmp[i]);
+      compute_plc_pred(&st->plc_net, st->features, zeros);
      if (st->enable_blending) {
        LPCNetState copy;
        copy = st->lpcnet;
        lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
        for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) {
          float w;
          w = .5 - .5*cos(M_PI*i/(FRAME_SIZE-TRAINING_OFFSET));
          pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*(tmp[i]-delta));
        }
        st->lpcnet = copy;
        lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
      } else {
        RNN_COPY(tmp, pcm, FRAME_SIZE-TRAINING_OFFSET);
        lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
      }
      st->blend = 0;
      RNN_COPY(st->pcm, &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
@ -93,18 +170,28 @@ LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
    run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);
    st->skip_analysis--;
  } else {
    RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
    plc_features[2*NB_BANDS+NB_FEATURES] = 1;
    compute_plc_pred(&st->plc_net, st->features, plc_features);
    for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];
    RNN_COPY(output, &st->pcm[0], FRAME_SIZE);
    lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);
    RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
  }
-  RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);
+  st->loss_count = 0;
  if (st->remove_dc) {
    for (i=0;i<FRAME_SIZE;i++) {
      pcm[i] += lp[i];
    }
  }
  return 0;
 }
-LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
+static const float att_table[10] = {0, 0,  -.2, -.2,  -.4, -.4,  -.8, -.8, -1.6, -1.6};
 static int lpcnet_plc_conceal_causal(LPCNetPLCState *st, short *pcm) {
  int i;
  short output[FRAME_SIZE];
  float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
  st->enc.pcount = 0;
  /* If we concealed the previous frame, finish synthesizing the rest of the samples. */
  /* FIXME: Copy/predict features. */
@ -113,16 +200,20 @@ LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
    int update_count;
    update_count = IMIN(st->pcm_fill, FRAME_SIZE);
    RNN_COPY(output, &st->pcm[0], update_count);
-
+    compute_plc_pred(&st->plc_net, st->features, zeros);
    lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], output, update_count, update_count);
    RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
    st->pcm_fill -= update_count;
    st->skip_analysis++;
  }
  st->plc_copy = st->plc_net;
  lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);
  compute_plc_pred(&st->plc_net, st->features, zeros);
  if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
  else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
  //if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
  lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
  {
    int i;
    float x[FRAME_SIZE];
    /* FIXME: Can we do better? */
    for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
@ -130,6 +221,175 @@ LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
    compute_frame_features(&st->enc, x);
    process_single_frame(&st->enc, NULL);
  }
  st->loss_count++;
  st->blend = 1;
  if (st->remove_dc) {
    for (i=0;i<FRAME_SIZE;i++) {
      st->syn_dc += DC_CONST*(pcm[i] - st->syn_dc);
      pcm[i] += (int)floor(.5 + st->dc_mem);
    }
  }
  return 0;
 }
 /* In this non-causal version of the code, the DNN model implemented by compute_plc_pred()
   is always called once per frame. We process audio up to the current position minus TRAINING_OFFSET. */
 void process_queued_update(LPCNetPLCState *st) {
  if (st->queued_update) {
    lpcnet_synthesize_impl(&st->lpcnet, st->features, st->queued_samples, FRAME_SIZE, FRAME_SIZE);
    st->queued_update=0;
  }
 }
 static int lpcnet_plc_update_non_causal(LPCNetPLCState *st, short *pcm) {
  int i;
  float x[FRAME_SIZE];
  short pcm_save[FRAME_SIZE];
  float plc_features[2*NB_BANDS+NB_FEATURES+1];
  short lp[FRAME_SIZE]={0};
  double mem_bak=0;
  int delta = st->syn_dc;
  process_queued_update(st);
  if (st->remove_dc) {
    st->dc_mem += st->syn_dc;
    st->syn_dc = 0;
    mem_bak = st->dc_mem;
    for (i=0;i<FRAME_SIZE;i++) {
      lp[i] = (int)floor(.5 + st->dc_mem);
      st->dc_mem += DC_CONST*(pcm[i] - st->dc_mem);
      pcm[i] -= lp[i];
    }
  }
  RNN_COPY(pcm_save, pcm, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
  burg_cepstral_analysis(plc_features, x);
  st->enc.pcount = 0;
  if (st->loss_count > 0) {
    LPCNetState copy;
    /* Handle blending. */
    float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
    RNN_COPY(zeros, plc_features, 2*NB_BANDS);
    zeros[2*NB_BANDS+NB_FEATURES] = 1;
    compute_plc_pred(&st->plc_net, st->features, zeros);
    copy = st->lpcnet;
    lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
    /* Undo initial DC offset removal so that we can take into account the last 5ms of synthesis. */
    if (st->remove_dc) {
      for (i=0;i<FRAME_SIZE;i++) pcm[i] += lp[i];
      st->dc_mem = mem_bak;
      for (i=0;i<TRAINING_OFFSET;i++) st->syn_dc += DC_CONST*(st->pcm[FRAME_SIZE-TRAINING_OFFSET+i] - st->syn_dc);
      st->dc_mem += st->syn_dc;
      delta += st->syn_dc;
      st->syn_dc = 0;
      for (i=0;i<FRAME_SIZE;i++) {
        lp[i] = (int)floor(.5 + st->dc_mem);
        st->dc_mem += DC_CONST*(pcm[i] - st->dc_mem);
        pcm[i] -= lp[i];
      }
      RNN_COPY(pcm_save, pcm, FRAME_SIZE);
    }
    {
      short rev[FRAME_SIZE];
      for (i=0;i<FRAME_SIZE;i++) rev[i] = pcm[FRAME_SIZE-i-1];
      clear_state(st);
      lpcnet_synthesize_impl(&st->lpcnet, st->features, rev, FRAME_SIZE, FRAME_SIZE);
      lpcnet_synthesize_tail_impl(&st->lpcnet, rev, TRAINING_OFFSET, 0);
      for (i=0;i<TRAINING_OFFSET;i++) {
        float w;
        w = .5 - .5*cos(M_PI*i/(TRAINING_OFFSET));
        st->pcm[FRAME_SIZE-1-i] = (int)floor(.5 + w*st->pcm[FRAME_SIZE-1-i] + (1-w)*(rev[i]+delta));
      }
    }
    st->lpcnet = copy;
 #if 1
    st->queued_update = 1;
    RNN_COPY(&st->queued_samples[0], &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
    RNN_COPY(&st->queued_samples[TRAINING_OFFSET], pcm, FRAME_SIZE-TRAINING_OFFSET);
 #else
    lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
    lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
 #endif
    for (i=0;i<FRAME_SIZE;i++) x[i] = st->pcm[i];
    preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
    compute_frame_features(&st->enc, x);
    process_single_frame(&st->enc, NULL);
  }
  for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
  preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
  compute_frame_features(&st->enc, x);
  process_single_frame(&st->enc, NULL);
  if (st->loss_count == 0) {
    RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
    plc_features[2*NB_BANDS+NB_FEATURES] = 1;
    compute_plc_pred(&st->plc_net, st->features, plc_features);
    lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
    lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
  }
  RNN_COPY(&pcm[FRAME_SIZE-TRAINING_OFFSET], pcm, TRAINING_OFFSET);
  RNN_COPY(pcm, &st->pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET);
  RNN_COPY(st->pcm, pcm_save, FRAME_SIZE);
  st->loss_count = 0;
  if (st->remove_dc) {
    for (i=0;i<TRAINING_OFFSET;i++) pcm[i] += st->dc_buf[i];
    for (;i<FRAME_SIZE;i++) pcm[i] += lp[i-TRAINING_OFFSET];
    for (i=0;i<TRAINING_OFFSET;i++) st->dc_buf[i] = lp[FRAME_SIZE-TRAINING_OFFSET+i];
  }
  return 0;
 }
 static int lpcnet_plc_conceal_non_causal(LPCNetPLCState *st, short *pcm) {
  int i;
  float x[FRAME_SIZE];
  float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
  process_queued_update(st);
  st->enc.pcount = 0;
  compute_plc_pred(&st->plc_net, st->features, zeros);
  if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
  else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
  //if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
  if (st->loss_count == 0) {
    RNN_COPY(pcm, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
    lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
    lpcnet_synthesize_tail_impl(&st->lpcnet, &pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET, 0);
  } else {
    lpcnet_synthesize_impl(&st->lpcnet, st->features, pcm, TRAINING_OFFSET, 0);
    lpcnet_synthesize_tail_impl(&st->lpcnet, &pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET, 0);
    RNN_COPY(&st->pcm[FRAME_SIZE-TRAINING_OFFSET], pcm, TRAINING_OFFSET);
    for (i=0;i<FRAME_SIZE;i++) x[i] = st->pcm[i];
    preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
    compute_frame_features(&st->enc, x);
    process_single_frame(&st->enc, NULL);
  }
  RNN_COPY(st->pcm, &pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET);
  if (st->remove_dc) {
    int dc = (int)floor(.5 + st->dc_mem);
    if (st->loss_count == 0) {
        for (i=TRAINING_OFFSET;i<FRAME_SIZE;i++) st->syn_dc += DC_CONST*(pcm[i] - st->syn_dc);
    } else {
        for (i=0;i<FRAME_SIZE;i++) st->syn_dc += DC_CONST*(pcm[i] - st->syn_dc);
    }
    for (i=0;i<TRAINING_OFFSET;i++) pcm[i] += st->dc_buf[i];
    for (;i<FRAME_SIZE;i++) pcm[i] += dc;
    for (i=0;i<TRAINING_OFFSET;i++) st->dc_buf[i] = dc;
  }
  st->loss_count++;
  return 0;
 }
 LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
  if (st->non_causal) return lpcnet_plc_update_non_causal(st, pcm);
  else return lpcnet_plc_update_causal(st, pcm);
 }
 LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
  if (st->non_causal) return lpcnet_plc_conceal_non_causal(st, pcm);
  else return lpcnet_plc_conceal_causal(st, pcm);
 }
--- a/dnn/lpcnet_private.h
+++ b/dnn/lpcnet_private.h
@ -6,6 +6,7 @@
 #include "freq.h"
 #include "lpcnet.h"
 #include "nnet_data.h"
 #include "plc_data.h"
 #include "kiss99.h"
 #define BITS_PER_CHAR 8
@ -61,6 +62,7 @@ struct LPCNetEncState{
  float features[4][NB_TOTAL_FEATURES];
  float sig_mem[LPC_ORDER];
  int exc_mem;
  float burg_cepstrum[2*NB_BANDS];
 };
 #define PLC_BUF_SIZE (FEATURES_DELAY*FRAME_SIZE + TRAINING_OFFSET)
@ -72,6 +74,18 @@ struct LPCNetPLCState {
  int skip_analysis;
  int blend;
  float features[NB_TOTAL_FEATURES];
  int loss_count;
  PLCNetState plc_net;
  PLCNetState plc_copy;
  int enable_blending;
  int non_causal;
  double dc_mem;
  double syn_dc;
  int remove_dc;
  short dc_buf[TRAINING_OFFSET];
  int queued_update;
  short queued_samples[FRAME_SIZE];
 };
 extern float ceps_codebook1[];
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@ -38,6 +38,7 @@
 #include "tansig_table.h"
 #include "nnet.h"
 #include "nnet_data.h"
 #include "plc_data.h"
 #ifdef NO_OPTIMIZATIONS
 #warning Compiling without any vectorization. This code will be very slow
@ -315,13 +316,15 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input)
      state[i] = h[i];
 }
 #define MAX_RNN_NEURONS_ALL IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS)
 void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)
 {
   int i;
   int N, M;
   int stride;
-   float zrh[3*MAX_RNN_NEURONS];
+   float zrh[3*MAX_RNN_NEURONS_ALL];
-   float recur[3*MAX_RNN_NEURONS];
+   float recur[3*MAX_RNN_NEURONS_ALL];
   float *z;
   float *r;
   float *h;
@ -330,7 +333,7 @@ void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *stat
   z = zrh;
   r = &zrh[N];
   h = &zrh[2*N];
-   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);
+   celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS_ALL);
   celt_assert(input != state);
   celt_assert(gru->reset_after);
   stride = 3*N;
--- a/dnn/training_tf2/dump_plc.py
+++ b/dnn/training_tf2/dump_plc.py
@ -0,0 +1,265 @@
 #!/usr/bin/python3
 '''Copyright (c) 2021-2022 Amazon
   Copyright (c) 2017-2018 Mozilla
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 '''
 import lpcnet_plc
 import sys
 import numpy as np
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
 import h5py
 import re
 # Flag for dumping e2e (differentiable lpc) network weights
 flag_e2e = False
 max_rnn_neurons = 1
 max_conv_inputs = 1
 def printVector(f, vector, name, dtype='float', dotp=False):
    if dotp:
        vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
        vector = vector.transpose((2, 0, 3, 1))
    v = np.reshape(vector, (-1));
    #print('static const float ', name, '[', len(v), '] = \n', file=f)
    f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
    for i in range(0, len(v)):
        f.write('{}'.format(v[i]))
        if (i!=len(v)-1):
            f.write(',')
        else:
            break;
        if (i%8==7):
            f.write("\n   ")
        else:
            f.write(" ")
    #print(v, file=f)
    f.write('\n};\n\n')
    return;
 def printSparseVector(f, A, name, have_diag=True):
    N = A.shape[0]
    M = A.shape[1]
    W = np.zeros((0,), dtype='int')
    W0 = np.zeros((0,))
    if have_diag:
        diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
        A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
        A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
        A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
        printVector(f, diag, name + '_diag')
    AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
    idx = np.zeros((0,), dtype='int')
    for i in range(M//8):
        pos = idx.shape[0]
        idx = np.append(idx, -1)
        nb_nonzero = 0
        for j in range(N//4):
            block = A[j*4:(j+1)*4, i*8:(i+1)*8]
            qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
            if np.sum(np.abs(block)) > 1e-10:
                nb_nonzero = nb_nonzero + 1
                idx = np.append(idx, j*4)
                vblock = qblock.transpose((1,0)).reshape((-1,))
                W0 = np.concatenate([W0, block.reshape((-1,))])
                W = np.concatenate([W, vblock])
        idx[pos] = nb_nonzero
    f.write('#ifdef DOT_PROD\n')
    printVector(f, W, name, dtype='qweight')
    f.write('#else /*DOT_PROD*/\n')
    printVector(f, W0, name, dtype='qweight')
    f.write('#endif /*DOT_PROD*/\n')
    #idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
    printVector(f, idx, name + '_idx', dtype='int')
    return AQ
 def dump_layer_ignore(self, f, hf):
    print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
    return False
 Layer.dump_layer = dump_layer_ignore
 def dump_sparse_gru(self, f, hf):
    global max_rnn_neurons
    name = 'sparse_' + self.name
    print("printing layer " + name + " of type sparse " + self.__class__.__name__)
    weights = self.get_weights()
    qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
    printVector(f, weights[-1], name + '_bias')
    subias = weights[-1].copy()
    subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
    printVector(f, subias, name + '_subias')
    if hasattr(self, 'activation'):
        activation = self.activation.__name__.upper()
    else:
        activation = 'TANH'
    if hasattr(self, 'reset_after') and not self.reset_after:
        reset_after = 0
    else:
        reset_after = 1
    neurons = weights[0].shape[1]//3
    max_rnn_neurons = max(max_rnn_neurons, neurons)
    f.write('const SparseGRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_recurrent_weights_diag,\n   {}_recurrent_weights,\n   {}_recurrent_weights_idx,\n   {}, ACTIVATION_{}, {}\n}};\n\n'
            .format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
    hf.write('extern const SparseGRULayer {};\n\n'.format(name));
    return True
 def dump_gru_layer(self, f, hf):
    global max_rnn_neurons
    name = self.name
    print("printing layer " + name + " of type " + self.__class__.__name__)
    weights = self.get_weights()
    qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
    f.write('#ifdef DOT_PROD\n')
    qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
    printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
    f.write('#else /*DOT_PROD*/\n')
    printVector(f, weights[1], name + '_recurrent_weights')
    f.write('#endif /*DOT_PROD*/\n')
    printVector(f, weights[-1], name + '_bias')
    subias = weights[-1].copy()
    subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
    subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
    printVector(f, subias, name + '_subias')
    if hasattr(self, 'activation'):
        activation = self.activation.__name__.upper()
    else:
        activation = 'TANH'
    if hasattr(self, 'reset_after') and not self.reset_after:
        reset_after = 0
    else:
        reset_after = 1
    neurons = weights[0].shape[1]//3
    max_rnn_neurons = max(max_rnn_neurons, neurons)
    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_weights_idx,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
            .format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
    hf.write('extern const GRULayer {};\n\n'.format(name));
    return True
 GRU.dump_layer = dump_gru_layer
 def dump_gru_layer_dummy(self, f, hf):
    name = self.name
    weights = self.get_weights()
    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
    hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
    return True;
 #GRU.dump_layer = dump_gru_layer_dummy
 def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
    printVector(f, weights, name + '_weights')
    printVector(f, bias, name + '_bias')
    f.write('const DenseLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, ACTIVATION_{}\n}};\n\n'
            .format(name, name, name, weights.shape[0], weights.shape[1], activation))
    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
    hf.write('extern const DenseLayer {};\n\n'.format(name));
 def dump_dense_layer(self, f, hf):
    name = self.name
    print("printing layer " + name + " of type " + self.__class__.__name__)
    weights = self.get_weights()
    activation = self.activation.__name__.upper()
    dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
    return False
 Dense.dump_layer = dump_dense_layer
 def dump_conv1d_layer(self, f, hf):
    global max_conv_inputs
    name = self.name
    print("printing layer " + name + " of type " + self.__class__.__name__)
    weights = self.get_weights()
    printVector(f, weights[0], name + '_weights')
    printVector(f, weights[-1], name + '_bias')
    activation = self.activation.__name__.upper()
    max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
    f.write('const Conv1DLayer {} = {{\n   {}_bias,\n   {}_weights,\n   {}, {}, {}, ACTIVATION_{}\n}};\n\n'
            .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
    hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
    hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
    hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
    hf.write('extern const Conv1DLayer {};\n\n'.format(name));
    return True
 Conv1D.dump_layer = dump_conv1d_layer
 filename = sys.argv[1]
 with h5py.File(filename, "r") as f:
    units = min(f['model_weights']['plc_gru1']['plc_gru1']['recurrent_kernel:0'].shape)
    units2 = min(f['model_weights']['plc_gru2']['plc_gru2']['recurrent_kernel:0'].shape)
    cond_size = f['model_weights']['plc_dense1']['plc_dense1']['kernel:0'].shape[1]
 model = lpcnet_plc.new_lpcnet_plc_model(rnn_units=units, cond_size=cond_size)
 model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
 #model.summary()
 model.load_weights(filename, by_name=True)
 if len(sys.argv) > 2:
    cfile = sys.argv[2];
    hfile = sys.argv[3];
 else:
    cfile = 'plc_data.c'
    hfile = 'plc_data.h'
 f = open(cfile, 'w')
 hf = open(hfile, 'w')
 f.write('/*This file is automatically generated from a Keras model*/\n')
 f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
 f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
 hf.write('/*This file is automatically generated from a Keras model*/\n\n')
 hf.write('#ifndef PLC_DATA_H\n#define PLC_DATA_H\n\n#include "nnet.h"\n\n')
 layer_list = []
 for i, layer in enumerate(model.layers):
    if layer.dump_layer(f, hf):
        layer_list.append(layer.name)
 #dump_sparse_gru(model.get_layer('gru_a'), f, hf)
 hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
 #hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
 hf.write('typedef struct {\n')
 for i, name in enumerate(layer_list):
    hf.write('  float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) 
 hf.write('} PLCNetState;\n')
 hf.write('\n\n#endif\n')
 f.close()
 hf.close()
--- a/dnn/training_tf2/lpcnet_plc.py
+++ b/dnn/training_tf2/lpcnet_plc.py
@ -0,0 +1,101 @@
 #!/usr/bin/python3
 '''Copyright (c) 2021-2022 Amazon
   Copyright (c) 2018-2019 Mozilla
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 '''
 import math
 import tensorflow as tf
 from tensorflow.keras.models import Model
 from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
 from tensorflow.compat.v1.keras.layers import CuDNNGRU
 from tensorflow.keras import backend as K
 from tensorflow.keras.constraints import Constraint
 from tensorflow.keras.initializers import Initializer
 from tensorflow.keras.callbacks import Callback
 import numpy as np
 def quant_regularizer(x):
    Q = 128
    Q_1 = 1./Q
    #return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
    return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
 class WeightClip(Constraint):
    '''Clips the weights incident to each hidden unit to be inside a range
    '''
    def __init__(self, c=2):
        self.c = c
    def __call__(self, p):
        # Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
        # saturation when implementing dot products with SSSE3 or AVX2.
        return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
        #return K.clip(p, -self.c, self.c)
    def get_config(self):
        return {'name': self.__class__.__name__,
            'c': self.c}
 constraint = WeightClip(0.992)
 def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, nb_burg_features=36, batch_size=128, training=False, adaptation=False, quantize=False, cond_size=128):
    feat = Input(shape=(None, nb_used_features+nb_burg_features), batch_size=batch_size)
    lost = Input(shape=(None, 1), batch_size=batch_size)
    fdense1 = Dense(cond_size, activation='tanh', name='plc_dense1')
    cfeat = Concatenate()([feat, lost])
    cfeat = fdense1(cfeat)
    #cfeat = Conv1D(cond_size, 3, padding='causal', activation='tanh', name='plc_conv1')(cfeat)
    quant = quant_regularizer if quantize else None
    if training:
        rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru1', stateful=True,
              kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
        rnn2 = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru2', stateful=True,
              kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
    else:
        rnn = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru1', stateful=True,
              kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
        rnn2 = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru2', stateful=True,
              kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
    gru_out1, _ = rnn(cfeat)
    gru_out1 = GaussianNoise(.005)(gru_out1)
    gru_out2, _ = rnn2(gru_out1)
    out_dense = Dense(nb_used_features, activation='linear', name='plc_out')
    plc_out = out_dense(gru_out2)
    model = Model([feat, lost], plc_out)
    model.rnn_units = rnn_units
    model.cond_size = cond_size
    model.nb_used_features = nb_used_features
    model.nb_burg_features = nb_burg_features
    return model
--- a/dnn/training_tf2/plc_loader.py
+++ b/dnn/training_tf2/plc_loader.py
@ -0,0 +1,67 @@
 #!/usr/bin/python3
 '''Copyright (c) 2021-2022 Amazon
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 '''
 import numpy as np
 from tensorflow.keras.utils import Sequence
 class PLCLoader(Sequence):
    def __init__(self, features, lost, nb_burg_features, batch_size):
        self.batch_size = batch_size
        self.nb_batches = features.shape[0]//self.batch_size
        self.features = features[:self.nb_batches*self.batch_size, :, :]
        self.lost = lost.astype('float')
        self.lost = self.lost[:(len(self.lost)//features.shape[1]-1)*features.shape[1]]
        self.nb_burg_features = nb_burg_features
        self.on_epoch_end()
    def on_epoch_end(self):
        self.indices = np.arange(self.nb_batches*self.batch_size)
        np.random.shuffle(self.indices)
        offset = np.random.randint(0, high=self.features.shape[1])
        self.lost_offset = np.reshape(self.lost[offset:-self.features.shape[1]+offset], (-1, self.features.shape[1]))
        self.lost_indices = np.random.randint(0, high=self.lost_offset.shape[0], size=self.nb_batches*self.batch_size)
    def __getitem__(self, index):
        features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
        #lost = (np.random.rand(features.shape[0], features.shape[1]) > .2).astype('float')
        lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]
        lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))
        lost_mask = np.tile(lost, (1,1,features.shape[2]))
        in_features = features*lost_mask
        #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
        in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
        out_lost = np.copy(lost)
        out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
        out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)
        inputs = [in_features*lost_mask, lost]
        outputs = [out_features]
        return (inputs, outputs)
    def __len__(self):
        return self.nb_batches
--- a/dnn/training_tf2/test_plc.py
+++ b/dnn/training_tf2/test_plc.py
@ -0,0 +1,92 @@
 #!/usr/bin/python3
 '''Copyright (c) 2021-2022 Amazon
   Copyright (c) 2018-2019 Mozilla
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 '''
 # Train an LPCNet model
 import argparse
 from plc_loader import PLCLoader
 parser = argparse.ArgumentParser(description='Test a PLC model')
 parser.add_argument('weights', metavar='<weights file>', help='weights file (.h5)')
 parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
 parser.add_argument('output', metavar='<output>', help='reconstructed file (float32)')
 parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
 group1 = parser.add_mutually_exclusive_group()
 parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
 parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
 args = parser.parse_args()
 import importlib
 lpcnet = importlib.import_module(args.model)
 import sys
 import numpy as np
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
 import tensorflow.keras.backend as K
 import h5py
 import tensorflow as tf
 #gpus = tf.config.experimental.list_physical_devices('GPU')
 #if gpus:
 #  try:
 #    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
 #  except RuntimeError as e:
 #    print(e)
 model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=1, training=False, quantize=False, cond_size=args.cond_size)
 model.compile()
 lpc_order = 16
 feature_file = args.features
 nb_features = model.nb_used_features + lpc_order
 nb_used_features = model.nb_used_features
 # u for unquantised, load 16 bit PCM samples and convert to mu-law
 features = np.loadtxt(feature_file)
 print(features.shape)
 sequence_size = features.shape[0]
 lost = np.reshape(features[:,-1:], (1, sequence_size, 1))
 features = features[:,:nb_used_features]
 features = np.reshape(features, (1, sequence_size, nb_used_features))
 model.load_weights(args.weights)
 features = features*lost
 out = model.predict([features, lost])
 out = features + (1-lost)*out
 np.savetxt(args.output, out[0,:,:])
--- a/dnn/training_tf2/train_plc.py
+++ b/dnn/training_tf2/train_plc.py
@ -0,0 +1,197 @@
 #!/usr/bin/python3
 '''Copyright (c) 2021-2022 Amazon
   Copyright (c) 2018-2019 Mozilla
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 '''
 # Train an LPCNet model
 import argparse
 from plc_loader import PLCLoader
 parser = argparse.ArgumentParser(description='Train a PLC model')
 parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
 parser.add_argument('lost_file', metavar='<packet loss file>', help='packet loss traces (int8)')
 parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
 parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
 group1 = parser.add_mutually_exclusive_group()
 group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
 group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
 parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
 parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
 parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
 parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
 parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
 parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
 parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
 parser.add_argument('--band-loss', metavar='<weight>', default=1.0, type=float, help='weight of band loss (default 1.0)')
 parser.add_argument('--loss-bias', metavar='<bias>', default=0.0, type=float, help='loss bias towards low energy (default 0.0)')
 parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
 args = parser.parse_args()
 import importlib
 lpcnet = importlib.import_module(args.model)
 import sys
 import numpy as np
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
 import tensorflow.keras.backend as K
 import h5py
 import tensorflow as tf
 #gpus = tf.config.experimental.list_physical_devices('GPU')
 #if gpus:
 #  try:
 #    tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
 #  except RuntimeError as e:
 #    print(e)
 nb_epochs = args.epochs
 # Try reducing batch_size if you run out of memory on your GPU
 batch_size = args.batch_size
 quantize = args.quantize is not None
 retrain = args.retrain is not None
 if quantize:
    lr = 0.00003
    decay = 0
    input_model = args.quantize
 else:
    lr = 0.001
    decay = 2.5e-5
 if args.lr is not None:
    lr = args.lr
 if args.decay is not None:
    decay = args.decay
 if retrain:
    input_model = args.retrain
 def plc_loss(alpha=1.0, bias=0.):
    def loss(y_true,y_pred):
        mask = y_true[:,:,-1:]
        y_true = y_true[:,:,:-1]
        e = (y_pred - y_true)*mask
        e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
        bias_mask = K.minimum(1., K.maximum(0., 4*y_true[:,:,-1:]))
        l1_loss = K.mean(K.abs(e)) + 0.1*K.mean(K.maximum(0., -e[:,:,-1:])) + alpha*K.mean(K.abs(e_bands) + bias*bias_mask*K.maximum(0., e_bands)) + K.mean(K.minimum(K.abs(e[:,:,18:19]),1.)) + 8*K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
        return l1_loss
    return loss
 def plc_l1_loss():
    def L1_loss(y_true,y_pred):
        mask = y_true[:,:,-1:]
        y_true = y_true[:,:,:-1]
        e = (y_pred - y_true)*mask
        l1_loss = K.mean(K.abs(e))
        return l1_loss
    return L1_loss
 def plc_ceps_loss():
    def ceps_loss(y_true,y_pred):
        mask = y_true[:,:,-1:]
        y_true = y_true[:,:,:-1]
        e = (y_pred - y_true)*mask
        l1_loss = K.mean(K.abs(e[:,:,:-2]))
        return l1_loss
    return ceps_loss
 def plc_band_loss():
    def L1_band_loss(y_true,y_pred):
        mask = y_true[:,:,-1:]
        y_true = y_true[:,:,:-1]
        e = (y_pred - y_true)*mask
        e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
        l1_loss = K.mean(K.abs(e_bands))
        return l1_loss
    return L1_band_loss
 def plc_pitch_loss():
    def pitch_loss(y_true,y_pred):
        mask = y_true[:,:,-1:]
        y_true = y_true[:,:,:-1]
        e = (y_pred - y_true)*mask
        l1_loss = K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
        return l1_loss
    return pitch_loss
 opt = Adam(lr, decay=decay, beta_2=0.99)
 strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
 with strategy.scope():
    model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=batch_size, training=True, quantize=quantize, cond_size=args.cond_size)
    model.compile(optimizer=opt, loss=plc_loss(alpha=args.band_loss, bias=args.loss_bias), metrics=[plc_l1_loss(), plc_ceps_loss(), plc_band_loss(), plc_pitch_loss()])
    model.summary()
 lpc_order = 16
 feature_file = args.features
 nb_features = model.nb_used_features + lpc_order + model.nb_burg_features
 nb_used_features = model.nb_used_features
 nb_burg_features = model.nb_burg_features
 sequence_size = args.seq_length
 # u for unquantised, load 16 bit PCM samples and convert to mu-law
 features = np.memmap(feature_file, dtype='float32', mode='r')
 nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
 features = features[:nb_sequences*sequence_size*nb_features]
 features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
 features = features[:, :, :nb_used_features+model.nb_burg_features]
 lost = np.memmap(args.lost_file, dtype='int8', mode='r')
 # dump models to disk as we go
 checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.gru_size, '{epoch:02d}'))
 if args.retrain is not None:
    model.load_weights(args.retrain)
 if quantize or retrain:
    #Adapting from an existing model
    model.load_weights(input_model)
 model.save_weights('{}_{}_initial.h5'.format(args.output, args.gru_size))
 loader = PLCLoader(features, lost, nb_burg_features, batch_size)
 callbacks = [checkpoint]
 if args.logdir is not None:
    logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.gru_size)
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
    callbacks.append(tensorboard_callback)
 model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)