mirror of
https://github.com/xiph/opus.git
synced 2025-06-01 08:07:41 +00:00
Merge branch 'plc_challenge' into master
This commit is contained in:
commit
60450472a6
20 changed files with 1501 additions and 48 deletions
|
@ -8,6 +8,7 @@ include_HEADERS = include/lpcnet.h
|
|||
|
||||
lib_LTLIBRARIES = liblpcnet.la
|
||||
noinst_HEADERS = arch.h \
|
||||
burg.h \
|
||||
common.h \
|
||||
freq.h \
|
||||
_kiss_fft_guts.h \
|
||||
|
@ -16,6 +17,7 @@ noinst_HEADERS = arch.h \
|
|||
lpcnet_private.h \
|
||||
opus_types.h \
|
||||
nnet_data.h \
|
||||
plc_data.h \
|
||||
nnet.h \
|
||||
pitch.h \
|
||||
tansig_table.h \
|
||||
|
@ -24,6 +26,7 @@ noinst_HEADERS = arch.h \
|
|||
vec_neon.h
|
||||
|
||||
liblpcnet_la_SOURCES = \
|
||||
burg.c \
|
||||
common.c \
|
||||
kiss99.c \
|
||||
lpcnet.c \
|
||||
|
@ -31,6 +34,7 @@ liblpcnet_la_SOURCES = \
|
|||
lpcnet_enc.c \
|
||||
nnet.c \
|
||||
nnet_data.c \
|
||||
plc_data.c \
|
||||
ceps_codebooks.c \
|
||||
pitch.c \
|
||||
freq.c \
|
||||
|
@ -52,7 +56,7 @@ lpcnet_demo_LDADD = liblpcnet.la
|
|||
#dump_data_SOURCES = dump_data.c
|
||||
#dump_data_LDADD = $(DUMP_OBJ) $(LIBM)
|
||||
|
||||
dump_data_SOURCES = common.c dump_data.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
|
||||
dump_data_SOURCES = common.c dump_data.c burg.c freq.c kiss_fft.c pitch.c lpcnet_dec.c lpcnet_enc.c ceps_codebooks.c
|
||||
dump_data_LDADD = $(LIBM)
|
||||
dump_data_CFLAGS = $(AM_CFLAGS)
|
||||
|
||||
|
|
|
@ -3,8 +3,14 @@
|
|||
Low complexity implementation of the WaveRNN-based LPCNet algorithm, as described in:
|
||||
|
||||
- J.-M. Valin, J. Skoglund, [LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://jmvalin.ca/papers/lpcnet_icassp2019.pdf), *Proc. International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, arXiv:1810.11846, 2019.
|
||||
- J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [Neural Speech Synthesis on a Shoestring: Improving the Efficiency of LPCNet](https://jmvalin.ca/papers/improved_lpcnet.pdf), *Proc. ICASSP*, arxiv:2106.04129, 2022.
|
||||
- K. Subramani, J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [End-to-end LPCNet: A Neural Vocoder With Fully-Differentiable LPC Estimation](https://jmvalin.ca/papers/lpcnet_end2end.pdf), *Proc. INTERSPEECH*, arxiv:2106.04129, 2022.
|
||||
|
||||
For coding/PLC applications of LPCNet, see:
|
||||
|
||||
- J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
|
||||
- J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
|
||||
- J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
|
||||
|
||||
# Introduction
|
||||
|
||||
|
@ -59,6 +65,22 @@ Alternatively, you can run the uncompressed analysis/synthesis using -features
|
|||
instead of -encode and -synthesis instead of -decode.
|
||||
The same functionality is available in the form of a library. See include/lpcnet.h for the API.
|
||||
|
||||
To try packet loss concealment (PLC), you first need a PLC model, which you can get with:
|
||||
```
|
||||
./download_model.sh plc-3b1eab4
|
||||
```
|
||||
or (for the PLC challenge submission):
|
||||
```
|
||||
./download_model.sh plc_challenge
|
||||
```
|
||||
PLC can be tested with:
|
||||
```
|
||||
./lpcnet_demo -plc_file noncausal_dc error_pattern.txt input.pcm output.pcm
|
||||
```
|
||||
where error_pattern.txt is a text file with one entry per 20-ms packet, with 1 meaning "packet lost" and 0 meaning "packet not lost".
|
||||
noncausal_dc is the non-causal (5-ms look-ahead) with special handling for DC offsets. It's also possible to use "noncausal", "causal",
|
||||
or "causal_dc".
|
||||
|
||||
# Training a new model
|
||||
|
||||
This codebase is also meant for research and it is possible to train new models. These are the steps to do that:
|
||||
|
|
245
dnn/burg.c
Normal file
245
dnn/burg.c
Normal file
|
@ -0,0 +1,245 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "burg.h"
|
||||
|
||||
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
|
||||
#define SILK_MAX_ORDER_LPC 16
|
||||
#define FIND_LPC_COND_FAC 1e-5f
|
||||
|
||||
/* sum of squares of a silk_float array, with result as double */
|
||||
static double silk_energy_FLP(
|
||||
const float *data,
|
||||
int dataSize
|
||||
)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
for( i = 0; i < dataSize - 3; i += 4 ) {
|
||||
result += data[ i + 0 ] * (double)data[ i + 0 ] +
|
||||
data[ i + 1 ] * (double)data[ i + 1 ] +
|
||||
data[ i + 2 ] * (double)data[ i + 2 ] +
|
||||
data[ i + 3 ] * (double)data[ i + 3 ];
|
||||
}
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data[ i ] * (double)data[ i ];
|
||||
}
|
||||
|
||||
assert( result >= 0.0 );
|
||||
return result;
|
||||
}
|
||||
|
||||
/* inner product of two silk_float arrays, with result as double */
|
||||
static double silk_inner_product_FLP(
|
||||
const float *data1,
|
||||
const float *data2,
|
||||
int dataSize
|
||||
)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
for( i = 0; i < dataSize - 3; i += 4 ) {
|
||||
result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
|
||||
data1[ i + 1 ] * (double)data2[ i + 1 ] +
|
||||
data1[ i + 2 ] * (double)data2[ i + 2 ] +
|
||||
data1[ i + 3 ] * (double)data2[ i + 3 ];
|
||||
}
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data1[ i ] * (double)data2[ i ];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* Compute reflection coefficients from input signal */
|
||||
float silk_burg_analysis( /* O returns residual energy */
|
||||
float A[], /* O prediction coefficients (length order) */
|
||||
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
|
||||
const float minInvGain, /* I minimum inverse prediction gain */
|
||||
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
|
||||
const int nb_subfr, /* I number of subframes stacked in x */
|
||||
const int D /* I order */
|
||||
)
|
||||
{
|
||||
int k, n, s, reached_max_gain;
|
||||
double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
|
||||
const float *x_ptr;
|
||||
double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
|
||||
double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
|
||||
double Af[ SILK_MAX_ORDER_LPC ];
|
||||
|
||||
assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
|
||||
|
||||
/* Compute autocorrelations, added over subframes */
|
||||
C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
|
||||
memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
for( n = 1; n < D + 1; n++ ) {
|
||||
C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
|
||||
}
|
||||
}
|
||||
memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
|
||||
|
||||
/* Initialize */
|
||||
CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
|
||||
invGain = 1.0f;
|
||||
reached_max_gain = 0;
|
||||
for( n = 0; n < D; n++ ) {
|
||||
/* Update first row of correlation matrix (without first element) */
|
||||
/* Update last row of correlation matrix (without last element, stored in reversed order) */
|
||||
/* Update C * Af */
|
||||
/* Update C * flipud(Af) (stored in reversed order) */
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
tmp1 = x_ptr[ n ];
|
||||
tmp2 = x_ptr[ subfr_length - n - 1 ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
|
||||
C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
|
||||
Atmp = Af[ k ];
|
||||
tmp1 += x_ptr[ n - k - 1 ] * Atmp;
|
||||
tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
|
||||
}
|
||||
for( k = 0; k <= n; k++ ) {
|
||||
CAf[ k ] -= tmp1 * x_ptr[ n - k ];
|
||||
CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
|
||||
}
|
||||
}
|
||||
tmp1 = C_first_row[ n ];
|
||||
tmp2 = C_last_row[ n ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
tmp1 += C_last_row[ n - k - 1 ] * Atmp;
|
||||
tmp2 += C_first_row[ n - k - 1 ] * Atmp;
|
||||
}
|
||||
CAf[ n + 1 ] = tmp1;
|
||||
CAb[ n + 1 ] = tmp2;
|
||||
|
||||
/* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
|
||||
num = CAb[ n + 1 ];
|
||||
nrg_b = CAb[ 0 ];
|
||||
nrg_f = CAf[ 0 ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
num += CAb[ n - k ] * Atmp;
|
||||
nrg_b += CAb[ k + 1 ] * Atmp;
|
||||
nrg_f += CAf[ k + 1 ] * Atmp;
|
||||
}
|
||||
assert( nrg_f > 0.0 );
|
||||
assert( nrg_b > 0.0 );
|
||||
|
||||
/* Calculate the next order reflection (parcor) coefficient */
|
||||
rc = -2.0 * num / ( nrg_f + nrg_b );
|
||||
assert( rc > -1.0 && rc < 1.0 );
|
||||
|
||||
/* Update inverse prediction gain */
|
||||
tmp1 = invGain * ( 1.0 - rc * rc );
|
||||
if( tmp1 <= minInvGain ) {
|
||||
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
|
||||
rc = sqrt( 1.0 - minInvGain / invGain );
|
||||
if( num > 0 ) {
|
||||
/* Ensure adjusted reflection coefficients has the original sign */
|
||||
rc = -rc;
|
||||
}
|
||||
invGain = minInvGain;
|
||||
reached_max_gain = 1;
|
||||
} else {
|
||||
invGain = tmp1;
|
||||
}
|
||||
|
||||
/* Update the AR coefficients */
|
||||
for( k = 0; k < (n + 1) >> 1; k++ ) {
|
||||
tmp1 = Af[ k ];
|
||||
tmp2 = Af[ n - k - 1 ];
|
||||
Af[ k ] = tmp1 + rc * tmp2;
|
||||
Af[ n - k - 1 ] = tmp2 + rc * tmp1;
|
||||
}
|
||||
Af[ n ] = rc;
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Reached max prediction gain; set remaining coefficients to zero and exit loop */
|
||||
for( k = n + 1; k < D; k++ ) {
|
||||
Af[ k ] = 0.0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update C * Af and C * Ab */
|
||||
for( k = 0; k <= n + 1; k++ ) {
|
||||
tmp1 = CAf[ k ];
|
||||
CAf[ k ] += rc * CAb[ n - k + 1 ];
|
||||
CAb[ n - k + 1 ] += rc * tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Convert to float */
|
||||
for( k = 0; k < D; k++ ) {
|
||||
A[ k ] = (float)( -Af[ k ] );
|
||||
}
|
||||
/* Subtract energy of preceding samples from C0 */
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
C0 -= silk_energy_FLP( x + s * subfr_length, D );
|
||||
}
|
||||
/* Approximate residual energy */
|
||||
nrg_f = C0 * invGain;
|
||||
} else {
|
||||
/* Compute residual energy and store coefficients as float */
|
||||
nrg_f = CAf[ 0 ];
|
||||
tmp1 = 1.0;
|
||||
for( k = 0; k < D; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
nrg_f += CAf[ k + 1 ] * Atmp;
|
||||
tmp1 += Atmp * Atmp;
|
||||
A[ k ] = (float)(-Atmp);
|
||||
}
|
||||
nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
|
||||
}
|
||||
|
||||
/* Return residual energy */
|
||||
return (float)nrg_f;
|
||||
}
|
36
dnn/burg.h
Normal file
36
dnn/burg.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
|
||||
float silk_burg_analysis( /* O returns residual energy */
|
||||
float A[], /* O prediction coefficients (length order) */
|
||||
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
|
||||
const float minInvGain, /* I minimum inverse prediction gain */
|
||||
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
|
||||
const int nb_subfr, /* I number of subframes stacked in x */
|
||||
const int D /* I order */
|
||||
);
|
|
@ -9,4 +9,5 @@ if [ ! -f $model ]; then
|
|||
fi
|
||||
tar xvf $model
|
||||
touch src/nnet_data.[ch]
|
||||
touch src/plc_data.[ch]
|
||||
mv src/*.[ch] .
|
||||
|
|
|
@ -138,9 +138,18 @@ int main(int argc, char **argv) {
|
|||
int encode = 0;
|
||||
int decode = 0;
|
||||
int quantize = 0;
|
||||
int burg = 0;
|
||||
srand(getpid());
|
||||
st = lpcnet_encoder_create();
|
||||
argv0=argv[0];
|
||||
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
|
||||
burg = 1;
|
||||
training = 1;
|
||||
}
|
||||
if (argc == 4 && strcmp(argv[1], "-btest")==0) {
|
||||
burg = 1;
|
||||
training = 0;
|
||||
}
|
||||
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
||||
if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
|
||||
training = 1;
|
||||
|
@ -236,7 +245,8 @@ int main(int argc, char **argv) {
|
|||
if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
|
||||
if (training && ++gain_change_count > 2821) {
|
||||
float tmp, tmp2;
|
||||
speech_gain = pow(10., (-20+(rand()%40))/20.);
|
||||
speech_gain = pow(10., (-30+(rand()%40))/20.);
|
||||
if (rand()&1) speech_gain = -speech_gain;
|
||||
if (rand()%20==0) speech_gain *= .01;
|
||||
if (rand()%100==0) speech_gain = 0;
|
||||
gain_change_count = 0;
|
||||
|
@ -247,13 +257,18 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
|
||||
biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
|
||||
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
float g;
|
||||
float f = (float)i/FRAME_SIZE;
|
||||
g = f*speech_gain + (1-f)*old_speech_gain;
|
||||
x[i] *= g;
|
||||
}
|
||||
if (burg) {
|
||||
float ceps[2*NB_BANDS];
|
||||
burg_cepstral_analysis(ceps, x);
|
||||
fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
|
||||
}
|
||||
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
|
||||
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
|
||||
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
|
||||
|
|
75
dnn/freq.c
75
dnn/freq.c
|
@ -37,6 +37,7 @@
|
|||
#include "freq.h"
|
||||
#include "pitch.h"
|
||||
#include "arch.h"
|
||||
#include "burg.h"
|
||||
#include <assert.h>
|
||||
|
||||
#define SQUARE(x) ((x)*(x))
|
||||
|
@ -58,6 +59,32 @@ typedef struct {
|
|||
} CommonState;
|
||||
|
||||
|
||||
void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
|
||||
int i;
|
||||
float sum[NB_BANDS] = {0};
|
||||
for (i=0;i<NB_BANDS-1;i++)
|
||||
{
|
||||
int j;
|
||||
int band_size;
|
||||
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
|
||||
for (j=0;j<band_size;j++) {
|
||||
float tmp;
|
||||
float frac = (float)j/band_size;
|
||||
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
|
||||
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
|
||||
tmp = 1.f/(tmp + 1e-9);
|
||||
sum[i] += (1-frac)*tmp;
|
||||
sum[i+1] += frac*tmp;
|
||||
}
|
||||
}
|
||||
sum[0] *= 2;
|
||||
sum[NB_BANDS-1] *= 2;
|
||||
for (i=0;i<NB_BANDS;i++)
|
||||
{
|
||||
bandE[i] = sum[i];
|
||||
}
|
||||
}
|
||||
|
||||
float _lpcnet_lpc(
|
||||
opus_val16 *lpc, /* out: [0...p-1] LPC coefficients */
|
||||
opus_val16 *rc,
|
||||
|
@ -128,6 +155,54 @@ void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
|
|||
}
|
||||
}
|
||||
|
||||
void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
|
||||
int i;
|
||||
float burg_in[FRAME_SIZE];
|
||||
float burg_lpc[LPC_ORDER];
|
||||
float x[WINDOW_SIZE];
|
||||
float Eburg[NB_BANDS];
|
||||
float g;
|
||||
float E;
|
||||
kiss_fft_cpx LPC[FREQ_SIZE];
|
||||
float Ly[NB_BANDS];
|
||||
assert(order <= LPC_ORDER);
|
||||
assert(len <= FRAME_SIZE);
|
||||
for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
|
||||
g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
|
||||
g /= len - 2*(order-1);
|
||||
//printf("%g\n", g);
|
||||
RNN_CLEAR(x, WINDOW_SIZE);
|
||||
x[0] = 1;
|
||||
for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
|
||||
forward_transform(LPC, x);
|
||||
compute_band_energy_inverse(Eburg, LPC);
|
||||
for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
|
||||
float logMax = -2;
|
||||
float follow = -2;
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
Ly[i] = log10(1e-2+Eburg[i]);
|
||||
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
|
||||
logMax = MAX16(logMax, Ly[i]);
|
||||
follow = MAX16(follow-2.5, Ly[i]);
|
||||
E += Eburg[i];
|
||||
}
|
||||
dct(burg_cepstrum, Ly);
|
||||
burg_cepstrum[0] += - 4;
|
||||
}
|
||||
|
||||
void burg_cepstral_analysis(float *ceps, const float *x) {
|
||||
int i;
|
||||
compute_burg_cepstrum(x, &ceps[0 ], FRAME_SIZE/2, LPC_ORDER);
|
||||
compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
float c0, c1;
|
||||
c0 = ceps[i];
|
||||
c1 = ceps[NB_BANDS+i];
|
||||
ceps[i ] = .5*(c0+c1);
|
||||
ceps[NB_BANDS+i] = (c0-c1);
|
||||
}
|
||||
}
|
||||
|
||||
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
|
||||
int i;
|
||||
float sum[NB_BANDS] = {0};
|
||||
|
|
|
@ -47,6 +47,8 @@
|
|||
|
||||
void compute_band_energy(float *bandE, const kiss_fft_cpx *X);
|
||||
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P);
|
||||
void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order);
|
||||
void burg_cepstral_analysis(float *ceps, const float *x);
|
||||
|
||||
void apply_window(float *x);
|
||||
void dct(float *out, const float *in);
|
||||
|
|
|
@ -176,11 +176,18 @@ LPCNET_EXPORT void lpcnet_destroy(LPCNetState *st);
|
|||
*/
|
||||
LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *st, const float *features, short *output, int N);
|
||||
|
||||
|
||||
#define LPCNET_PLC_CAUSAL 0
|
||||
#define LPCNET_PLC_NONCAUSAL 1
|
||||
#define LPCNET_PLC_CODEC 2
|
||||
|
||||
#define LPCNET_PLC_DC_FILTER 4
|
||||
|
||||
LPCNET_EXPORT int lpcnet_plc_get_size(void);
|
||||
|
||||
LPCNET_EXPORT void lpcnet_plc_init(LPCNetPLCState *st);
|
||||
LPCNET_EXPORT int lpcnet_plc_init(LPCNetPLCState *st, int options);
|
||||
|
||||
LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(void);
|
||||
LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(int options);
|
||||
|
||||
LPCNET_EXPORT void lpcnet_plc_destroy(LPCNetPLCState *st);
|
||||
|
||||
|
|
|
@ -98,7 +98,6 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b
|
|||
compute_conv1d(&feature_conv1, conv1_out, net->feature_conv1_state, in);
|
||||
if (lpcnet->frame_count < FEATURE_CONV1_DELAY) RNN_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
|
||||
compute_conv1d(&feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
|
||||
celt_assert(FRAME_INPUT_SIZE == FEATURE_CONV2_OUT_SIZE);
|
||||
if (lpcnet->frame_count < FEATURES_DELAY) RNN_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
|
||||
_lpcnet_compute_dense(&feature_dense1, dense1_out, conv2_out);
|
||||
_lpcnet_compute_dense(&feature_dense2, condition, dense1_out);
|
||||
|
@ -196,8 +195,12 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int
|
|||
last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
|
||||
pred_ulaw = lin2ulaw(pred);
|
||||
exc = run_sample_network(&lpcnet->nnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
|
||||
if (i < preload) exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
|
||||
if (i < preload) {
|
||||
exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
|
||||
pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
|
||||
} else {
|
||||
pcm = pred + ulaw2lin(exc);
|
||||
}
|
||||
RNN_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
|
||||
lpcnet->last_sig[0] = pcm;
|
||||
lpcnet->last_exc = exc;
|
||||
|
@ -205,7 +208,7 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int
|
|||
lpcnet->deemph_mem = pcm;
|
||||
if (pcm<-32767) pcm = -32767;
|
||||
if (pcm>32767) pcm = 32767;
|
||||
output[i] = (int)floor(.5 + pcm);
|
||||
if (i >= preload) output[i] = (int)floor(.5 + pcm);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,30 +40,60 @@
|
|||
#define MODE_SYNTHESIS 3
|
||||
#define MODE_PLC 4
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int mode;
|
||||
int plc_percent=0;
|
||||
FILE *fin, *fout;
|
||||
if (argc != 4 && !(argc == 5 && strcmp(argv[1], "-plc") == 0))
|
||||
{
|
||||
void usage(void) {
|
||||
fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");
|
||||
fprintf(stderr, " lpcnet_demo -decode <compressed.lpcnet> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -features <input.pcm> <features.f32>\n");
|
||||
fprintf(stderr, " lpcnet_demo -synthesis <features.f32> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -plc <percent> <input.pcm> <output.pcm>\n");
|
||||
return 0;
|
||||
fprintf(stderr, " lpcnet_demo -plc <plc_options> <percent> <input.pcm> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n\n");
|
||||
fprintf(stderr, " plc_options:\n");
|
||||
fprintf(stderr, " causal: normal (causal) PLC\n");
|
||||
fprintf(stderr, " causal_dc: normal (causal) PLC with DC offset compensation\n");
|
||||
fprintf(stderr, " noncausal: non-causal PLC\n");
|
||||
fprintf(stderr, " noncausal_dc: non-causal PLC with DC offset compensation\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int mode;
|
||||
int plc_percent=0;
|
||||
FILE *fin, *fout;
|
||||
FILE *plc_file = NULL;
|
||||
const char *plc_options;
|
||||
int plc_flags=-1;
|
||||
if (argc < 4) usage();
|
||||
if (strcmp(argv[1], "-encode") == 0) mode=MODE_ENCODE;
|
||||
else if (strcmp(argv[1], "-decode") == 0) mode=MODE_DECODE;
|
||||
else if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
|
||||
else if (strcmp(argv[1], "-synthesis") == 0) mode=MODE_SYNTHESIS;
|
||||
else if (strcmp(argv[1], "-plc") == 0) {
|
||||
mode=MODE_PLC;
|
||||
plc_percent = atoi(argv[2]);
|
||||
argv++;
|
||||
} else {
|
||||
plc_options = argv[2];
|
||||
plc_percent = atoi(argv[3]);
|
||||
argv+=2;
|
||||
argc-=2;
|
||||
} else if (strcmp(argv[1], "-plc_file") == 0) {
|
||||
mode=MODE_PLC;
|
||||
plc_options = argv[2];
|
||||
plc_file = fopen(argv[3], "r");
|
||||
if (!plc_file) {
|
||||
fprintf(stderr, "Can't open %s\n", argv[3]);
|
||||
exit(1);
|
||||
}
|
||||
argv+=2;
|
||||
argc-=2;
|
||||
} else {
|
||||
usage();
|
||||
}
|
||||
if (mode == MODE_PLC) {
|
||||
if (strcmp(plc_options, "causal")==0) plc_flags = LPCNET_PLC_CAUSAL;
|
||||
else if (strcmp(plc_options, "causal_dc")==0) plc_flags = LPCNET_PLC_CAUSAL | LPCNET_PLC_DC_FILTER;
|
||||
else if (strcmp(plc_options, "noncausal")==0) plc_flags = LPCNET_PLC_NONCAUSAL;
|
||||
else if (strcmp(plc_options, "noncausal_dc")==0) plc_flags = LPCNET_PLC_NONCAUSAL | LPCNET_PLC_DC_FILTER;
|
||||
else usage();
|
||||
}
|
||||
if (argc != 4) usage();
|
||||
fin = fopen(argv[2], "rb");
|
||||
if (fin == NULL) {
|
||||
fprintf(stderr, "Can't open %s\n", argv[2]);
|
||||
|
@ -131,21 +161,31 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
lpcnet_destroy(net);
|
||||
} else if (mode == MODE_PLC) {
|
||||
short pcm[FRAME_SIZE];
|
||||
int count=0;
|
||||
int loss=0;
|
||||
int skip=0, extra=0;
|
||||
if ((plc_flags&0x3) == LPCNET_PLC_NONCAUSAL) skip=extra=80;
|
||||
LPCNetPLCState *net;
|
||||
net = lpcnet_plc_create();
|
||||
net = lpcnet_plc_create(plc_flags);
|
||||
while (1) {
|
||||
short pcm[FRAME_SIZE];
|
||||
size_t ret;
|
||||
ret = fread(pcm, sizeof(pcm[0]), FRAME_SIZE, fin);
|
||||
if (feof(fin) || ret != FRAME_SIZE) break;
|
||||
if (count % 2 == 0) loss = rand() < RAND_MAX*(float)plc_percent/100.f;
|
||||
if (count % 2 == 0) {
|
||||
if (plc_file != NULL) fscanf(plc_file, "%d", &loss);
|
||||
else loss = rand() < RAND_MAX*(float)plc_percent/100.f;
|
||||
}
|
||||
if (loss) lpcnet_plc_conceal(net, pcm);
|
||||
else lpcnet_plc_update(net, pcm);
|
||||
fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
|
||||
fwrite(&pcm[skip], sizeof(pcm[0]), FRAME_SIZE-skip, fout);
|
||||
skip = 0;
|
||||
count++;
|
||||
}
|
||||
if (extra) {
|
||||
lpcnet_plc_conceal(net, pcm);
|
||||
fwrite(pcm, sizeof(pcm[0]), extra, fout);
|
||||
}
|
||||
lpcnet_plc_destroy(net);
|
||||
} else {
|
||||
fprintf(stderr, "unknown action\n");
|
||||
|
|
|
@ -28,6 +28,10 @@
|
|||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifdef OPUS_BUILD
|
||||
#define celt_pitch_xcorr celt_pitch_xcorr_c
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
@ -540,7 +544,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
|||
ener = (1 + ener0 + celt_inner_prod(&st->exc_buf[i+off], &st->exc_buf[i+off], FRAME_SIZE/2));
|
||||
st->xc[2+2*st->pcount+sub][i] = 2*xcorr[i] / ener;
|
||||
}
|
||||
if (0) {
|
||||
if (1) {
|
||||
/* Upsample correlation by 3x and keep the max. */
|
||||
float interpolated[PITCH_MAX_PERIOD]={0};
|
||||
/* interp=sinc([-3:3]+1/3).*(.5+.5*cos(pi*[-3:3]/4.5)); interp=interp/sum(interp); */
|
||||
|
|
284
dnn/lpcnet_plc.c
284
dnn/lpcnet_plc.c
|
@ -30,24 +30,48 @@
|
|||
|
||||
#include "lpcnet_private.h"
|
||||
#include "lpcnet.h"
|
||||
#include "plc_data.h"
|
||||
|
||||
LPCNET_EXPORT int lpcnet_plc_get_size() {
|
||||
return sizeof(LPCNetPLCState);
|
||||
}
|
||||
|
||||
LPCNET_EXPORT void lpcnet_plc_init(LPCNetPLCState *st) {
|
||||
LPCNET_EXPORT int lpcnet_plc_init(LPCNetPLCState *st, int options) {
|
||||
if (FEATURES_DELAY != 0) {
|
||||
fprintf(stderr, "PLC cannot work with non-zero FEATURES_DELAY\n");
|
||||
fprintf(stderr, "Recompile with a no-lookahead model (see README.md)\n");
|
||||
exit(1);
|
||||
}
|
||||
RNN_CLEAR(st, 1);
|
||||
lpcnet_init(&st->lpcnet);
|
||||
lpcnet_encoder_init(&st->enc);
|
||||
RNN_CLEAR(st->pcm, PLC_BUF_SIZE);
|
||||
st->pcm_fill = PLC_BUF_SIZE;
|
||||
st->skip_analysis = 0;
|
||||
st->blend = 0;
|
||||
st->loss_count = 0;
|
||||
st->dc_mem = 0;
|
||||
st->queued_update = 0;
|
||||
if ((options&0x3) == LPCNET_PLC_CAUSAL) {
|
||||
st->enable_blending = 1;
|
||||
st->non_causal = 0;
|
||||
} else if ((options&0x3) == LPCNET_PLC_NONCAUSAL) {
|
||||
st->enable_blending = 1;
|
||||
st->non_causal = 1;
|
||||
} else if ((options&0x3) == LPCNET_PLC_CODEC) {
|
||||
st->enable_blending = 0;
|
||||
st->non_causal = 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
st->remove_dc = !!(options&LPCNET_PLC_DC_FILTER);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create() {
|
||||
LPCNET_EXPORT LPCNetPLCState *lpcnet_plc_create(int options) {
|
||||
LPCNetPLCState *st;
|
||||
st = malloc(sizeof(*st));
|
||||
lpcnet_plc_init(st);
|
||||
st = calloc(sizeof(*st), 1);
|
||||
lpcnet_plc_init(st, options);
|
||||
return st;
|
||||
}
|
||||
|
||||
|
@ -55,20 +79,73 @@ LPCNET_EXPORT void lpcnet_plc_destroy(LPCNetPLCState *st) {
|
|||
free(st);
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
||||
static void compute_plc_pred(PLCNetState *net, float *out, const float *in) {
|
||||
float zeros[3*PLC_MAX_RNN_NEURONS] = {0};
|
||||
float dense_out[PLC_DENSE1_OUT_SIZE];
|
||||
_lpcnet_compute_dense(&plc_dense1, dense_out, in);
|
||||
compute_gruB(&plc_gru1, zeros, net->plc_gru1_state, dense_out);
|
||||
compute_gruB(&plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);
|
||||
_lpcnet_compute_dense(&plc_out, out, net->plc_gru2_state);
|
||||
/* Artificially boost the correlation to make harmonics cleaner. */
|
||||
out[19] = MIN16(.5f, out[19]+.1f);
|
||||
}
|
||||
|
||||
void clear_state(LPCNetPLCState *st) {
|
||||
RNN_CLEAR(st->lpcnet.last_sig, LPC_ORDER);
|
||||
st->lpcnet.last_exc = lin2ulaw(0.f);;
|
||||
st->lpcnet.deemph_mem = 0;
|
||||
RNN_CLEAR(st->lpcnet.nnet.gru_a_state, GRU_A_STATE_SIZE);
|
||||
RNN_CLEAR(st->lpcnet.nnet.gru_b_state, GRU_B_STATE_SIZE);
|
||||
}
|
||||
|
||||
#define DC_CONST 0.003
|
||||
|
||||
/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
|
||||
needs to generate two feature vectors to conceal the first lost packet.*/
|
||||
|
||||
static int lpcnet_plc_update_causal(LPCNetPLCState *st, short *pcm) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
short output[FRAME_SIZE];
|
||||
float plc_features[2*NB_BANDS+NB_FEATURES+1];
|
||||
short lp[FRAME_SIZE]={0};
|
||||
int delta = 0;
|
||||
if (st->remove_dc) {
|
||||
st->dc_mem += st->syn_dc;
|
||||
delta = st->syn_dc;
|
||||
st->syn_dc = 0;
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
lp[i] = (int)floor(.5 + st->dc_mem);
|
||||
st->dc_mem += DC_CONST*(pcm[i] - st->dc_mem);
|
||||
pcm[i] -= lp[i];
|
||||
}
|
||||
}
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
burg_cepstral_analysis(plc_features, x);
|
||||
st->enc.pcount = 0;
|
||||
if (st->skip_analysis) {
|
||||
/*fprintf(stderr, "skip update\n");*/
|
||||
if (st->blend) {
|
||||
short tmp[FRAME_SIZE-TRAINING_OFFSET];
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
|
||||
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
RNN_COPY(zeros, plc_features, 2*NB_BANDS);
|
||||
zeros[2*NB_BANDS+NB_FEATURES] = 1;
|
||||
st->plc_net = st->plc_copy;
|
||||
compute_plc_pred(&st->plc_net, st->features, zeros);
|
||||
if (st->enable_blending) {
|
||||
LPCNetState copy;
|
||||
copy = st->lpcnet;
|
||||
lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
|
||||
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) {
|
||||
float w;
|
||||
w = .5 - .5*cos(M_PI*i/(FRAME_SIZE-TRAINING_OFFSET));
|
||||
pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*tmp[i]);
|
||||
pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*(tmp[i]-delta));
|
||||
}
|
||||
st->lpcnet = copy;
|
||||
lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
|
||||
} else {
|
||||
RNN_COPY(tmp, pcm, FRAME_SIZE-TRAINING_OFFSET);
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
|
||||
}
|
||||
st->blend = 0;
|
||||
RNN_COPY(st->pcm, &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
|
||||
|
@ -93,18 +170,28 @@ LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
|||
run_frame_network(&st->lpcnet, gru_a_condition, gru_b_condition, lpc, st->enc.features[0]);
|
||||
st->skip_analysis--;
|
||||
} else {
|
||||
RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
|
||||
plc_features[2*NB_BANDS+NB_FEATURES] = 1;
|
||||
compute_plc_pred(&st->plc_net, st->features, plc_features);
|
||||
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i];
|
||||
RNN_COPY(output, &st->pcm[0], FRAME_SIZE);
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE);
|
||||
|
||||
RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
|
||||
}
|
||||
RNN_COPY(st->features, st->enc.features[0], NB_TOTAL_FEATURES);
|
||||
st->loss_count = 0;
|
||||
if (st->remove_dc) {
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
pcm[i] += lp[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
|
||||
static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6};
|
||||
static int lpcnet_plc_conceal_causal(LPCNetPLCState *st, short *pcm) {
|
||||
int i;
|
||||
short output[FRAME_SIZE];
|
||||
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
st->enc.pcount = 0;
|
||||
/* If we concealed the previous frame, finish synthesizing the rest of the samples. */
|
||||
/* FIXME: Copy/predict features. */
|
||||
|
@ -113,16 +200,20 @@ LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
|
|||
int update_count;
|
||||
update_count = IMIN(st->pcm_fill, FRAME_SIZE);
|
||||
RNN_COPY(output, &st->pcm[0], update_count);
|
||||
|
||||
compute_plc_pred(&st->plc_net, st->features, zeros);
|
||||
lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], output, update_count, update_count);
|
||||
RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE);
|
||||
st->pcm_fill -= update_count;
|
||||
st->skip_analysis++;
|
||||
}
|
||||
st->plc_copy = st->plc_net;
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);
|
||||
compute_plc_pred(&st->plc_net, st->features, zeros);
|
||||
if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
|
||||
else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
|
||||
//if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
|
||||
lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
|
||||
{
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
/* FIXME: Can we do better? */
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
|
@ -130,6 +221,175 @@ LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
|
|||
compute_frame_features(&st->enc, x);
|
||||
process_single_frame(&st->enc, NULL);
|
||||
}
|
||||
st->loss_count++;
|
||||
st->blend = 1;
|
||||
if (st->remove_dc) {
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
st->syn_dc += DC_CONST*(pcm[i] - st->syn_dc);
|
||||
pcm[i] += (int)floor(.5 + st->dc_mem);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* In this non-causal version of the code, the DNN model implemented by compute_plc_pred()
|
||||
is always called once per frame. We process audio up to the current position minus TRAINING_OFFSET. */
|
||||
|
||||
void process_queued_update(LPCNetPLCState *st) {
|
||||
if (st->queued_update) {
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->features, st->queued_samples, FRAME_SIZE, FRAME_SIZE);
|
||||
st->queued_update=0;
|
||||
}
|
||||
}
|
||||
|
||||
static int lpcnet_plc_update_non_causal(LPCNetPLCState *st, short *pcm) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
short pcm_save[FRAME_SIZE];
|
||||
float plc_features[2*NB_BANDS+NB_FEATURES+1];
|
||||
short lp[FRAME_SIZE]={0};
|
||||
double mem_bak=0;
|
||||
int delta = st->syn_dc;
|
||||
process_queued_update(st);
|
||||
if (st->remove_dc) {
|
||||
st->dc_mem += st->syn_dc;
|
||||
st->syn_dc = 0;
|
||||
mem_bak = st->dc_mem;
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
lp[i] = (int)floor(.5 + st->dc_mem);
|
||||
st->dc_mem += DC_CONST*(pcm[i] - st->dc_mem);
|
||||
pcm[i] -= lp[i];
|
||||
}
|
||||
}
|
||||
RNN_COPY(pcm_save, pcm, FRAME_SIZE);
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
burg_cepstral_analysis(plc_features, x);
|
||||
st->enc.pcount = 0;
|
||||
if (st->loss_count > 0) {
|
||||
LPCNetState copy;
|
||||
/* Handle blending. */
|
||||
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
RNN_COPY(zeros, plc_features, 2*NB_BANDS);
|
||||
zeros[2*NB_BANDS+NB_FEATURES] = 1;
|
||||
compute_plc_pred(&st->plc_net, st->features, zeros);
|
||||
copy = st->lpcnet;
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0);
|
||||
/* Undo initial DC offset removal so that we can take into account the last 5ms of synthesis. */
|
||||
if (st->remove_dc) {
|
||||
for (i=0;i<FRAME_SIZE;i++) pcm[i] += lp[i];
|
||||
st->dc_mem = mem_bak;
|
||||
for (i=0;i<TRAINING_OFFSET;i++) st->syn_dc += DC_CONST*(st->pcm[FRAME_SIZE-TRAINING_OFFSET+i] - st->syn_dc);
|
||||
st->dc_mem += st->syn_dc;
|
||||
delta += st->syn_dc;
|
||||
st->syn_dc = 0;
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
lp[i] = (int)floor(.5 + st->dc_mem);
|
||||
st->dc_mem += DC_CONST*(pcm[i] - st->dc_mem);
|
||||
pcm[i] -= lp[i];
|
||||
}
|
||||
RNN_COPY(pcm_save, pcm, FRAME_SIZE);
|
||||
}
|
||||
{
|
||||
short rev[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) rev[i] = pcm[FRAME_SIZE-i-1];
|
||||
clear_state(st);
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->features, rev, FRAME_SIZE, FRAME_SIZE);
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, rev, TRAINING_OFFSET, 0);
|
||||
for (i=0;i<TRAINING_OFFSET;i++) {
|
||||
float w;
|
||||
w = .5 - .5*cos(M_PI*i/(TRAINING_OFFSET));
|
||||
st->pcm[FRAME_SIZE-1-i] = (int)floor(.5 + w*st->pcm[FRAME_SIZE-1-i] + (1-w)*(rev[i]+delta));
|
||||
}
|
||||
|
||||
}
|
||||
st->lpcnet = copy;
|
||||
#if 1
|
||||
st->queued_update = 1;
|
||||
RNN_COPY(&st->queued_samples[0], &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
|
||||
RNN_COPY(&st->queued_samples[TRAINING_OFFSET], pcm, FRAME_SIZE-TRAINING_OFFSET);
|
||||
#else
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
|
||||
#endif
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = st->pcm[i];
|
||||
preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
compute_frame_features(&st->enc, x);
|
||||
process_single_frame(&st->enc, NULL);
|
||||
|
||||
}
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
compute_frame_features(&st->enc, x);
|
||||
process_single_frame(&st->enc, NULL);
|
||||
if (st->loss_count == 0) {
|
||||
RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES);
|
||||
plc_features[2*NB_BANDS+NB_FEATURES] = 1;
|
||||
compute_plc_pred(&st->plc_net, st->features, plc_features);
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);
|
||||
}
|
||||
RNN_COPY(&pcm[FRAME_SIZE-TRAINING_OFFSET], pcm, TRAINING_OFFSET);
|
||||
RNN_COPY(pcm, &st->pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET);
|
||||
RNN_COPY(st->pcm, pcm_save, FRAME_SIZE);
|
||||
st->loss_count = 0;
|
||||
if (st->remove_dc) {
|
||||
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] += st->dc_buf[i];
|
||||
for (;i<FRAME_SIZE;i++) pcm[i] += lp[i-TRAINING_OFFSET];
|
||||
for (i=0;i<TRAINING_OFFSET;i++) st->dc_buf[i] = lp[FRAME_SIZE-TRAINING_OFFSET+i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lpcnet_plc_conceal_non_causal(LPCNetPLCState *st, short *pcm) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
process_queued_update(st);
|
||||
st->enc.pcount = 0;
|
||||
|
||||
compute_plc_pred(&st->plc_net, st->features, zeros);
|
||||
if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
|
||||
else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
|
||||
//if (st->loss_count > 4) st->features[NB_FEATURES-1] = MAX16(-.5, st->features[NB_FEATURES-1]-.1*(st->loss_count-4));
|
||||
|
||||
if (st->loss_count == 0) {
|
||||
RNN_COPY(pcm, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->features, &st->pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, TRAINING_OFFSET);
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, &pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET, 0);
|
||||
} else {
|
||||
lpcnet_synthesize_impl(&st->lpcnet, st->features, pcm, TRAINING_OFFSET, 0);
|
||||
lpcnet_synthesize_tail_impl(&st->lpcnet, &pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET, 0);
|
||||
|
||||
RNN_COPY(&st->pcm[FRAME_SIZE-TRAINING_OFFSET], pcm, TRAINING_OFFSET);
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = st->pcm[i];
|
||||
preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
compute_frame_features(&st->enc, x);
|
||||
process_single_frame(&st->enc, NULL);
|
||||
}
|
||||
RNN_COPY(st->pcm, &pcm[TRAINING_OFFSET], FRAME_SIZE-TRAINING_OFFSET);
|
||||
|
||||
if (st->remove_dc) {
|
||||
int dc = (int)floor(.5 + st->dc_mem);
|
||||
if (st->loss_count == 0) {
|
||||
for (i=TRAINING_OFFSET;i<FRAME_SIZE;i++) st->syn_dc += DC_CONST*(pcm[i] - st->syn_dc);
|
||||
} else {
|
||||
for (i=0;i<FRAME_SIZE;i++) st->syn_dc += DC_CONST*(pcm[i] - st->syn_dc);
|
||||
}
|
||||
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] += st->dc_buf[i];
|
||||
for (;i<FRAME_SIZE;i++) pcm[i] += dc;
|
||||
for (i=0;i<TRAINING_OFFSET;i++) st->dc_buf[i] = dc;
|
||||
}
|
||||
st->loss_count++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
||||
if (st->non_causal) return lpcnet_plc_update_non_causal(st, pcm);
|
||||
else return lpcnet_plc_update_causal(st, pcm);
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
|
||||
if (st->non_causal) return lpcnet_plc_conceal_non_causal(st, pcm);
|
||||
else return lpcnet_plc_conceal_causal(st, pcm);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "freq.h"
|
||||
#include "lpcnet.h"
|
||||
#include "nnet_data.h"
|
||||
#include "plc_data.h"
|
||||
#include "kiss99.h"
|
||||
|
||||
#define BITS_PER_CHAR 8
|
||||
|
@ -61,6 +62,7 @@ struct LPCNetEncState{
|
|||
float features[4][NB_TOTAL_FEATURES];
|
||||
float sig_mem[LPC_ORDER];
|
||||
int exc_mem;
|
||||
float burg_cepstrum[2*NB_BANDS];
|
||||
};
|
||||
|
||||
#define PLC_BUF_SIZE (FEATURES_DELAY*FRAME_SIZE + TRAINING_OFFSET)
|
||||
|
@ -72,6 +74,18 @@ struct LPCNetPLCState {
|
|||
int skip_analysis;
|
||||
int blend;
|
||||
float features[NB_TOTAL_FEATURES];
|
||||
int loss_count;
|
||||
PLCNetState plc_net;
|
||||
PLCNetState plc_copy;
|
||||
int enable_blending;
|
||||
int non_causal;
|
||||
double dc_mem;
|
||||
double syn_dc;
|
||||
int remove_dc;
|
||||
|
||||
short dc_buf[TRAINING_OFFSET];
|
||||
int queued_update;
|
||||
short queued_samples[FRAME_SIZE];
|
||||
};
|
||||
|
||||
extern float ceps_codebook1[];
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "tansig_table.h"
|
||||
#include "nnet.h"
|
||||
#include "nnet_data.h"
|
||||
#include "plc_data.h"
|
||||
|
||||
#ifdef NO_OPTIMIZATIONS
|
||||
#warning Compiling without any vectorization. This code will be very slow
|
||||
|
@ -315,13 +316,15 @@ void compute_gru2(const GRULayer *gru, float *state, const float *input)
|
|||
state[i] = h[i];
|
||||
}
|
||||
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS)
|
||||
|
||||
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)
|
||||
{
|
||||
int i;
|
||||
int N, M;
|
||||
int stride;
|
||||
float zrh[3*MAX_RNN_NEURONS];
|
||||
float recur[3*MAX_RNN_NEURONS];
|
||||
float zrh[3*MAX_RNN_NEURONS_ALL];
|
||||
float recur[3*MAX_RNN_NEURONS_ALL];
|
||||
float *z;
|
||||
float *r;
|
||||
float *h;
|
||||
|
@ -330,7 +333,7 @@ void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *stat
|
|||
z = zrh;
|
||||
r = &zrh[N];
|
||||
h = &zrh[2*N];
|
||||
celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS);
|
||||
celt_assert(gru->nb_neurons <= MAX_RNN_NEURONS_ALL);
|
||||
celt_assert(input != state);
|
||||
celt_assert(gru->reset_after);
|
||||
stride = 3*N;
|
||||
|
|
265
dnn/training_tf2/dump_plc.py
Executable file
265
dnn/training_tf2/dump_plc.py
Executable file
|
@ -0,0 +1,265 @@
|
|||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2017-2018 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import lpcnet_plc
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding
|
||||
import h5py
|
||||
import re
|
||||
|
||||
# Flag for dumping e2e (differentiable lpc) network weights
|
||||
flag_e2e = False
|
||||
|
||||
max_rnn_neurons = 1
|
||||
max_conv_inputs = 1
|
||||
|
||||
def printVector(f, vector, name, dtype='float', dotp=False):
|
||||
if dotp:
|
||||
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
|
||||
vector = vector.transpose((2, 0, 3, 1))
|
||||
v = np.reshape(vector, (-1));
|
||||
#print('static const float ', name, '[', len(v), '] = \n', file=f)
|
||||
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
|
||||
for i in range(0, len(v)):
|
||||
f.write('{}'.format(v[i]))
|
||||
if (i!=len(v)-1):
|
||||
f.write(',')
|
||||
else:
|
||||
break;
|
||||
if (i%8==7):
|
||||
f.write("\n ")
|
||||
else:
|
||||
f.write(" ")
|
||||
#print(v, file=f)
|
||||
f.write('\n};\n\n')
|
||||
return;
|
||||
|
||||
def printSparseVector(f, A, name, have_diag=True):
|
||||
N = A.shape[0]
|
||||
M = A.shape[1]
|
||||
W = np.zeros((0,), dtype='int')
|
||||
W0 = np.zeros((0,))
|
||||
if have_diag:
|
||||
diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
|
||||
A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
|
||||
A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
|
||||
A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
|
||||
printVector(f, diag, name + '_diag')
|
||||
AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
|
||||
idx = np.zeros((0,), dtype='int')
|
||||
for i in range(M//8):
|
||||
pos = idx.shape[0]
|
||||
idx = np.append(idx, -1)
|
||||
nb_nonzero = 0
|
||||
for j in range(N//4):
|
||||
block = A[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
qblock = AQ[j*4:(j+1)*4, i*8:(i+1)*8]
|
||||
if np.sum(np.abs(block)) > 1e-10:
|
||||
nb_nonzero = nb_nonzero + 1
|
||||
idx = np.append(idx, j*4)
|
||||
vblock = qblock.transpose((1,0)).reshape((-1,))
|
||||
W0 = np.concatenate([W0, block.reshape((-1,))])
|
||||
W = np.concatenate([W, vblock])
|
||||
idx[pos] = nb_nonzero
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
printVector(f, W, name, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, W0, name, dtype='qweight')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
#idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16)
|
||||
printVector(f, idx, name + '_idx', dtype='int')
|
||||
return AQ
|
||||
|
||||
def dump_layer_ignore(self, f, hf):
|
||||
print("ignoring layer " + self.name + " of type " + self.__class__.__name__)
|
||||
return False
|
||||
Layer.dump_layer = dump_layer_ignore
|
||||
|
||||
def dump_sparse_gru(self, f, hf):
|
||||
global max_rnn_neurons
|
||||
name = 'sparse_' + self.name
|
||||
print("printing layer " + name + " of type sparse " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweights = printSparseVector(f, weights[1], name + '_recurrent_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[1,:] = subias[1,:] - np.sum(qweights*(1./128),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||
f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n'
|
||||
.format(name, name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('extern const SparseGRULayer {};\n\n'.format(name));
|
||||
return True
|
||||
|
||||
def dump_gru_layer(self, f, hf):
|
||||
global max_rnn_neurons
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
qweight = printSparseVector(f, weights[0], name + '_weights', have_diag=False)
|
||||
|
||||
f.write('#ifdef DOT_PROD\n')
|
||||
qweight2 = np.clip(np.round(128.*weights[1]).astype('int'), -128, 127)
|
||||
printVector(f, qweight2, name + '_recurrent_weights', dotp=True, dtype='qweight')
|
||||
f.write('#else /*DOT_PROD*/\n')
|
||||
printVector(f, weights[1], name + '_recurrent_weights')
|
||||
f.write('#endif /*DOT_PROD*/\n')
|
||||
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
subias = weights[-1].copy()
|
||||
subias[0,:] = subias[0,:] - np.sum(qweight*(1./128.),axis=0)
|
||||
subias[1,:] = subias[1,:] - np.sum(qweight2*(1./128.),axis=0)
|
||||
printVector(f, subias, name + '_subias')
|
||||
if hasattr(self, 'activation'):
|
||||
activation = self.activation.__name__.upper()
|
||||
else:
|
||||
activation = 'TANH'
|
||||
if hasattr(self, 'reset_after') and not self.reset_after:
|
||||
reset_after = 0
|
||||
else:
|
||||
reset_after = 1
|
||||
neurons = weights[0].shape[1]//3
|
||||
max_rnn_neurons = max(max_rnn_neurons, neurons)
|
||||
f.write('const GRULayer {} = {{\n {}_bias,\n {}_subias,\n {}_weights,\n {}_weights_idx,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n'
|
||||
.format(name, name, name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('extern const GRULayer {};\n\n'.format(name));
|
||||
return True
|
||||
GRU.dump_layer = dump_gru_layer
|
||||
|
||||
def dump_gru_layer_dummy(self, f, hf):
|
||||
name = self.name
|
||||
weights = self.get_weights()
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3))
|
||||
return True;
|
||||
|
||||
#GRU.dump_layer = dump_gru_layer_dummy
|
||||
|
||||
def dump_dense_layer_impl(name, weights, bias, activation, f, hf):
|
||||
printVector(f, weights, name + '_weights')
|
||||
printVector(f, bias, name + '_bias')
|
||||
f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n'
|
||||
.format(name, name, name, weights.shape[0], weights.shape[1], activation))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1]))
|
||||
hf.write('extern const DenseLayer {};\n\n'.format(name));
|
||||
|
||||
def dump_dense_layer(self, f, hf):
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
activation = self.activation.__name__.upper()
|
||||
dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf)
|
||||
return False
|
||||
|
||||
Dense.dump_layer = dump_dense_layer
|
||||
|
||||
def dump_conv1d_layer(self, f, hf):
|
||||
global max_conv_inputs
|
||||
name = self.name
|
||||
print("printing layer " + name + " of type " + self.__class__.__name__)
|
||||
weights = self.get_weights()
|
||||
printVector(f, weights[0], name + '_weights')
|
||||
printVector(f, weights[-1], name + '_bias')
|
||||
activation = self.activation.__name__.upper()
|
||||
max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0])
|
||||
f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n'
|
||||
.format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation))
|
||||
hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2]))
|
||||
hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1)))
|
||||
hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2))
|
||||
hf.write('extern const Conv1DLayer {};\n\n'.format(name));
|
||||
return True
|
||||
Conv1D.dump_layer = dump_conv1d_layer
|
||||
|
||||
|
||||
|
||||
filename = sys.argv[1]
|
||||
with h5py.File(filename, "r") as f:
|
||||
units = min(f['model_weights']['plc_gru1']['plc_gru1']['recurrent_kernel:0'].shape)
|
||||
units2 = min(f['model_weights']['plc_gru2']['plc_gru2']['recurrent_kernel:0'].shape)
|
||||
cond_size = f['model_weights']['plc_dense1']['plc_dense1']['kernel:0'].shape[1]
|
||||
|
||||
model = lpcnet_plc.new_lpcnet_plc_model(rnn_units=units, cond_size=cond_size)
|
||||
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
|
||||
#model.summary()
|
||||
|
||||
model.load_weights(filename, by_name=True)
|
||||
|
||||
if len(sys.argv) > 2:
|
||||
cfile = sys.argv[2];
|
||||
hfile = sys.argv[3];
|
||||
else:
|
||||
cfile = 'plc_data.c'
|
||||
hfile = 'plc_data.h'
|
||||
|
||||
|
||||
f = open(cfile, 'w')
|
||||
hf = open(hfile, 'w')
|
||||
|
||||
|
||||
f.write('/*This file is automatically generated from a Keras model*/\n')
|
||||
f.write('/*based on model {}*/\n\n'.format(sys.argv[1]))
|
||||
f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile))
|
||||
|
||||
hf.write('/*This file is automatically generated from a Keras model*/\n\n')
|
||||
hf.write('#ifndef PLC_DATA_H\n#define PLC_DATA_H\n\n#include "nnet.h"\n\n')
|
||||
|
||||
layer_list = []
|
||||
for i, layer in enumerate(model.layers):
|
||||
if layer.dump_layer(f, hf):
|
||||
layer_list.append(layer.name)
|
||||
|
||||
#dump_sparse_gru(model.get_layer('gru_a'), f, hf)
|
||||
|
||||
hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons))
|
||||
#hf.write('#define PLC_MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs))
|
||||
|
||||
hf.write('typedef struct {\n')
|
||||
for i, name in enumerate(layer_list):
|
||||
hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper()))
|
||||
hf.write('} PLCNetState;\n')
|
||||
|
||||
hf.write('\n\n#endif\n')
|
||||
|
||||
f.close()
|
||||
hf.close()
|
101
dnn/training_tf2/lpcnet_plc.py
Normal file
101
dnn/training_tf2/lpcnet_plc.py
Normal file
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import math
|
||||
import tensorflow as tf
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation, GaussianNoise
|
||||
from tensorflow.compat.v1.keras.layers import CuDNNGRU
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.constraints import Constraint
|
||||
from tensorflow.keras.initializers import Initializer
|
||||
from tensorflow.keras.callbacks import Callback
|
||||
import numpy as np
|
||||
|
||||
def quant_regularizer(x):
|
||||
Q = 128
|
||||
Q_1 = 1./Q
|
||||
#return .01 * tf.reduce_mean(1 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))
|
||||
return .01 * tf.reduce_mean(K.sqrt(K.sqrt(1.0001 - tf.math.cos(2*3.1415926535897931*(Q*x-tf.round(Q*x))))))
|
||||
|
||||
|
||||
class WeightClip(Constraint):
|
||||
'''Clips the weights incident to each hidden unit to be inside a range
|
||||
'''
|
||||
def __init__(self, c=2):
|
||||
self.c = c
|
||||
|
||||
def __call__(self, p):
|
||||
# Ensure that abs of adjacent weights don't sum to more than 127. Otherwise there's a risk of
|
||||
# saturation when implementing dot products with SSSE3 or AVX2.
|
||||
return self.c*p/tf.maximum(self.c, tf.repeat(tf.abs(p[:, 1::2])+tf.abs(p[:, 0::2]), 2, axis=1))
|
||||
#return K.clip(p, -self.c, self.c)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'c': self.c}
|
||||
|
||||
constraint = WeightClip(0.992)
|
||||
|
||||
def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, nb_burg_features=36, batch_size=128, training=False, adaptation=False, quantize=False, cond_size=128):
|
||||
feat = Input(shape=(None, nb_used_features+nb_burg_features), batch_size=batch_size)
|
||||
lost = Input(shape=(None, 1), batch_size=batch_size)
|
||||
|
||||
fdense1 = Dense(cond_size, activation='tanh', name='plc_dense1')
|
||||
|
||||
cfeat = Concatenate()([feat, lost])
|
||||
cfeat = fdense1(cfeat)
|
||||
#cfeat = Conv1D(cond_size, 3, padding='causal', activation='tanh', name='plc_conv1')(cfeat)
|
||||
|
||||
quant = quant_regularizer if quantize else None
|
||||
|
||||
if training:
|
||||
rnn = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru1', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
rnn2 = CuDNNGRU(rnn_units, return_sequences=True, return_state=True, name='plc_gru2', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
else:
|
||||
rnn = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru1', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
rnn2 = GRU(rnn_units, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='plc_gru2', stateful=True,
|
||||
kernel_constraint=constraint, recurrent_constraint = constraint, kernel_regularizer=quant, recurrent_regularizer=quant)
|
||||
|
||||
gru_out1, _ = rnn(cfeat)
|
||||
gru_out1 = GaussianNoise(.005)(gru_out1)
|
||||
gru_out2, _ = rnn2(gru_out1)
|
||||
|
||||
out_dense = Dense(nb_used_features, activation='linear', name='plc_out')
|
||||
plc_out = out_dense(gru_out2)
|
||||
|
||||
model = Model([feat, lost], plc_out)
|
||||
model.rnn_units = rnn_units
|
||||
model.cond_size = cond_size
|
||||
model.nb_used_features = nb_used_features
|
||||
model.nb_burg_features = nb_burg_features
|
||||
|
||||
return model
|
67
dnn/training_tf2/plc_loader.py
Normal file
67
dnn/training_tf2/plc_loader.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from tensorflow.keras.utils import Sequence
|
||||
|
||||
class PLCLoader(Sequence):
|
||||
def __init__(self, features, lost, nb_burg_features, batch_size):
|
||||
self.batch_size = batch_size
|
||||
self.nb_batches = features.shape[0]//self.batch_size
|
||||
self.features = features[:self.nb_batches*self.batch_size, :, :]
|
||||
self.lost = lost.astype('float')
|
||||
self.lost = self.lost[:(len(self.lost)//features.shape[1]-1)*features.shape[1]]
|
||||
self.nb_burg_features = nb_burg_features
|
||||
self.on_epoch_end()
|
||||
|
||||
def on_epoch_end(self):
|
||||
self.indices = np.arange(self.nb_batches*self.batch_size)
|
||||
np.random.shuffle(self.indices)
|
||||
offset = np.random.randint(0, high=self.features.shape[1])
|
||||
self.lost_offset = np.reshape(self.lost[offset:-self.features.shape[1]+offset], (-1, self.features.shape[1]))
|
||||
self.lost_indices = np.random.randint(0, high=self.lost_offset.shape[0], size=self.nb_batches*self.batch_size)
|
||||
|
||||
def __getitem__(self, index):
|
||||
features = self.features[self.indices[index*self.batch_size:(index+1)*self.batch_size], :, :]
|
||||
#lost = (np.random.rand(features.shape[0], features.shape[1]) > .2).astype('float')
|
||||
lost = self.lost_offset[self.lost_indices[index*self.batch_size:(index+1)*self.batch_size], :]
|
||||
lost = np.reshape(lost, (features.shape[0], features.shape[1], 1))
|
||||
lost_mask = np.tile(lost, (1,1,features.shape[2]))
|
||||
in_features = features*lost_mask
|
||||
|
||||
#For the first frame after a loss, we don't have valid features, but the Burg estimate is valid.
|
||||
in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:]
|
||||
out_lost = np.copy(lost)
|
||||
out_lost[:,1:,:] = out_lost[:,1:,:]*out_lost[:,:-1,:]
|
||||
|
||||
out_features = np.concatenate([features[:,:,self.nb_burg_features:], 1.-out_lost], axis=-1)
|
||||
inputs = [in_features*lost_mask, lost]
|
||||
outputs = [out_features]
|
||||
return (inputs, outputs)
|
||||
|
||||
def __len__(self):
|
||||
return self.nb_batches
|
92
dnn/training_tf2/test_plc.py
Normal file
92
dnn/training_tf2/test_plc.py
Normal file
|
@ -0,0 +1,92 @@
|
|||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Test a PLC model')
|
||||
|
||||
parser.add_argument('weights', metavar='<weights file>', help='weights file (.h5)')
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('output', metavar='<output>', help='reconstructed file (float32)')
|
||||
parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
|
||||
parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
lpcnet = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
#gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
#if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
|
||||
model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=1, training=False, quantize=False, cond_size=args.cond_size)
|
||||
model.compile()
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
feature_file = args.features
|
||||
nb_features = model.nb_used_features + lpc_order
|
||||
nb_used_features = model.nb_used_features
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
features = np.loadtxt(feature_file)
|
||||
print(features.shape)
|
||||
sequence_size = features.shape[0]
|
||||
lost = np.reshape(features[:,-1:], (1, sequence_size, 1))
|
||||
features = features[:,:nb_used_features]
|
||||
features = np.reshape(features, (1, sequence_size, nb_used_features))
|
||||
|
||||
|
||||
model.load_weights(args.weights)
|
||||
|
||||
features = features*lost
|
||||
out = model.predict([features, lost])
|
||||
|
||||
out = features + (1-lost)*out
|
||||
|
||||
np.savetxt(args.output, out[0,:,:])
|
197
dnn/training_tf2/train_plc.py
Normal file
197
dnn/training_tf2/train_plc.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/python3
|
||||
'''Copyright (c) 2021-2022 Amazon
|
||||
Copyright (c) 2018-2019 Mozilla
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
'''
|
||||
|
||||
# Train an LPCNet model
|
||||
|
||||
import argparse
|
||||
from plc_loader import PLCLoader
|
||||
|
||||
parser = argparse.ArgumentParser(description='Train a PLC model')
|
||||
|
||||
parser.add_argument('features', metavar='<features file>', help='binary features file (float32)')
|
||||
parser.add_argument('lost_file', metavar='<packet loss file>', help='packet loss traces (int8)')
|
||||
parser.add_argument('output', metavar='<output>', help='trained model file (.h5)')
|
||||
parser.add_argument('--model', metavar='<model>', default='lpcnet_plc', help='PLC model python definition (without .py)')
|
||||
group1 = parser.add_mutually_exclusive_group()
|
||||
group1.add_argument('--quantize', metavar='<input weights>', help='quantize model')
|
||||
group1.add_argument('--retrain', metavar='<input weights>', help='continue training model')
|
||||
parser.add_argument('--gru-size', metavar='<units>', default=256, type=int, help='number of units in GRU (default 256)')
|
||||
parser.add_argument('--cond-size', metavar='<units>', default=128, type=int, help='number of units in conditioning network (default 128)')
|
||||
parser.add_argument('--epochs', metavar='<epochs>', default=120, type=int, help='number of epochs to train for (default 120)')
|
||||
parser.add_argument('--batch-size', metavar='<batch size>', default=128, type=int, help='batch size to use (default 128)')
|
||||
parser.add_argument('--seq-length', metavar='<sequence length>', default=1000, type=int, help='sequence length to use (default 1000)')
|
||||
parser.add_argument('--lr', metavar='<learning rate>', type=float, help='learning rate')
|
||||
parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rate decay')
|
||||
parser.add_argument('--band-loss', metavar='<weight>', default=1.0, type=float, help='weight of band loss (default 1.0)')
|
||||
parser.add_argument('--loss-bias', metavar='<bias>', default=0.0, type=float, help='loss bias towards low energy (default 0.0)')
|
||||
parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
import importlib
|
||||
lpcnet = importlib.import_module(args.model)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
|
||||
import tensorflow.keras.backend as K
|
||||
import h5py
|
||||
|
||||
import tensorflow as tf
|
||||
#gpus = tf.config.experimental.list_physical_devices('GPU')
|
||||
#if gpus:
|
||||
# try:
|
||||
# tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)])
|
||||
# except RuntimeError as e:
|
||||
# print(e)
|
||||
|
||||
nb_epochs = args.epochs
|
||||
|
||||
# Try reducing batch_size if you run out of memory on your GPU
|
||||
batch_size = args.batch_size
|
||||
|
||||
quantize = args.quantize is not None
|
||||
retrain = args.retrain is not None
|
||||
|
||||
if quantize:
|
||||
lr = 0.00003
|
||||
decay = 0
|
||||
input_model = args.quantize
|
||||
else:
|
||||
lr = 0.001
|
||||
decay = 2.5e-5
|
||||
|
||||
if args.lr is not None:
|
||||
lr = args.lr
|
||||
|
||||
if args.decay is not None:
|
||||
decay = args.decay
|
||||
|
||||
if retrain:
|
||||
input_model = args.retrain
|
||||
|
||||
def plc_loss(alpha=1.0, bias=0.):
|
||||
def loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
|
||||
bias_mask = K.minimum(1., K.maximum(0., 4*y_true[:,:,-1:]))
|
||||
l1_loss = K.mean(K.abs(e)) + 0.1*K.mean(K.maximum(0., -e[:,:,-1:])) + alpha*K.mean(K.abs(e_bands) + bias*bias_mask*K.maximum(0., e_bands)) + K.mean(K.minimum(K.abs(e[:,:,18:19]),1.)) + 8*K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
|
||||
return l1_loss
|
||||
return loss
|
||||
|
||||
def plc_l1_loss():
|
||||
def L1_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
l1_loss = K.mean(K.abs(e))
|
||||
return l1_loss
|
||||
return L1_loss
|
||||
|
||||
def plc_ceps_loss():
|
||||
def ceps_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
l1_loss = K.mean(K.abs(e[:,:,:-2]))
|
||||
return l1_loss
|
||||
return ceps_loss
|
||||
|
||||
def plc_band_loss():
|
||||
def L1_band_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
e_bands = tf.signal.idct(e[:,:,:-2], norm='ortho')
|
||||
l1_loss = K.mean(K.abs(e_bands))
|
||||
return l1_loss
|
||||
return L1_band_loss
|
||||
|
||||
def plc_pitch_loss():
|
||||
def pitch_loss(y_true,y_pred):
|
||||
mask = y_true[:,:,-1:]
|
||||
y_true = y_true[:,:,:-1]
|
||||
e = (y_pred - y_true)*mask
|
||||
l1_loss = K.mean(K.minimum(K.abs(e[:,:,18:19]),.4))
|
||||
return l1_loss
|
||||
return pitch_loss
|
||||
|
||||
opt = Adam(lr, decay=decay, beta_2=0.99)
|
||||
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
|
||||
|
||||
with strategy.scope():
|
||||
model = lpcnet.new_lpcnet_plc_model(rnn_units=args.gru_size, batch_size=batch_size, training=True, quantize=quantize, cond_size=args.cond_size)
|
||||
model.compile(optimizer=opt, loss=plc_loss(alpha=args.band_loss, bias=args.loss_bias), metrics=[plc_l1_loss(), plc_ceps_loss(), plc_band_loss(), plc_pitch_loss()])
|
||||
model.summary()
|
||||
|
||||
lpc_order = 16
|
||||
|
||||
feature_file = args.features
|
||||
nb_features = model.nb_used_features + lpc_order + model.nb_burg_features
|
||||
nb_used_features = model.nb_used_features
|
||||
nb_burg_features = model.nb_burg_features
|
||||
sequence_size = args.seq_length
|
||||
|
||||
# u for unquantised, load 16 bit PCM samples and convert to mu-law
|
||||
|
||||
|
||||
features = np.memmap(feature_file, dtype='float32', mode='r')
|
||||
nb_sequences = len(features)//(nb_features*sequence_size)//batch_size*batch_size
|
||||
features = features[:nb_sequences*sequence_size*nb_features]
|
||||
|
||||
features = np.reshape(features, (nb_sequences, sequence_size, nb_features))
|
||||
|
||||
features = features[:, :, :nb_used_features+model.nb_burg_features]
|
||||
|
||||
lost = np.memmap(args.lost_file, dtype='int8', mode='r')
|
||||
|
||||
# dump models to disk as we go
|
||||
checkpoint = ModelCheckpoint('{}_{}_{}.h5'.format(args.output, args.gru_size, '{epoch:02d}'))
|
||||
|
||||
if args.retrain is not None:
|
||||
model.load_weights(args.retrain)
|
||||
|
||||
if quantize or retrain:
|
||||
#Adapting from an existing model
|
||||
model.load_weights(input_model)
|
||||
|
||||
model.save_weights('{}_{}_initial.h5'.format(args.output, args.gru_size))
|
||||
|
||||
loader = PLCLoader(features, lost, nb_burg_features, batch_size)
|
||||
|
||||
callbacks = [checkpoint]
|
||||
if args.logdir is not None:
|
||||
logdir = '{}/{}_{}_logs'.format(args.logdir, args.output, args.gru_size)
|
||||
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
|
||||
callbacks.append(tensorboard_callback)
|
||||
|
||||
model.fit(loader, epochs=nb_epochs, validation_split=0.0, callbacks=callbacks)
|
Loading…
Add table
Add a link
Reference in a new issue