From ddd5669e79a9e581e8420d2ed397e524da864337 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Sat, 28 Oct 2023 23:33:47 -0400 Subject: [PATCH] Pitch and fargan model updates Removing one of the 2d conv layers for pitch estimation reduces complexity without noticeable degradation. FARGAN model has more adversarial training. Also, no need for the double precision in the low-pass filter. --- autogen.sh | 2 +- dnn/lpcnet_enc.c | 4 ++-- dnn/pitchdnn.c | 3 +-- dnn/torch/neural-pitch/export_neuralpitch_weights.py | 3 +-- dnn/torch/neural-pitch/models.py | 7 ++----- 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/autogen.sh b/autogen.sh index 7f83b4b1..cc70a11a 100755 --- a/autogen.sh +++ b/autogen.sh @@ -9,7 +9,7 @@ set -e srcdir=`dirname $0` test -n "$srcdir" && cd "$srcdir" -dnn/download_model.sh 290be25 +dnn/download_model.sh c99054d echo "Updating build configuration files, please wait...." diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 6a4674eb..7133357f 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -81,8 +81,8 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const float xi, yi; xi = x[i]; yi = x[i] + mem[0]; - mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi); - mem[1] = (b[1]*(double)xi - a[1]*(double)yi); + mem[0] = mem[1] + (b[0]*xi - a[0]*yi); + mem[1] = (b[1]*xi - a[1]*yi); y[i] = yi; } } diff --git a/dnn/pitchdnn.c b/dnn/pitchdnn.c index 02c67444..84952721 100644 --- a/dnn/pitchdnn.c +++ b/dnn/pitchdnn.c @@ -33,8 +33,7 @@ float compute_pitchdnn( /* xcorr*/ OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES); compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH); - compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH); - compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH); + compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH); compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH); compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out); diff --git a/dnn/torch/neural-pitch/export_neuralpitch_weights.py b/dnn/torch/neural-pitch/export_neuralpitch_weights.py index 82b2d3d9..577ec882 100644 --- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py +++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py @@ -73,8 +73,7 @@ f""" conv_layers = [ ('conv.1', "conv2d_1"), - ('conv.4', "conv2d_2"), - ('conv.7', "conv2d_3") + ('conv.4', "conv2d_2") ] diff --git a/dnn/torch/neural-pitch/models.py b/dnn/torch/neural-pitch/models.py index 34b418e8..ce4977fd 100644 --- a/dnn/torch/neural-pitch/models.py +++ b/dnn/torch/neural-pitch/models.py @@ -86,13 +86,10 @@ class PitchDNN(torch.nn.Module): self.conv = torch.nn.Sequential( torch.nn.ZeroPad2d((2,0,1,1)), - torch.nn.Conv2d(1, 8, 3, bias=True), + torch.nn.Conv2d(1, 4, 3, bias=True), self.activation, torch.nn.ZeroPad2d((2,0,1,1)), - torch.nn.Conv2d(8, 8, 3, bias=True), - self.activation, - torch.nn.ZeroPad2d((2,0,1,1)), - torch.nn.Conv2d(8, 1, 3, bias=True), + torch.nn.Conv2d(4, 1, 3, bias=True), self.activation, )