Pitch and fargan model updates

Removing one of the 2d conv layers for pitch estimation reduces complexity without noticeable degradation. FARGAN model has more adversarial training. Also, no need for the double precision in the low-pass filter.
2023-10-28 23:33:47 -04:00 · 2023-10-28 23:33:47 -04:00 · ddd5669e79
commit ddd5669e79
parent c99054dad9
5 changed files with 7 additions and 12 deletions
--- a/autogen.sh
+++ b/autogen.sh
@ -9,7 +9,7 @@ set -e
 srcdir=`dirname $0`
 test -n "$srcdir" && cd "$srcdir"
-dnn/download_model.sh 290be25
+dnn/download_model.sh c99054d
 echo "Updating build configuration files, please wait...."
--- a/dnn/lpcnet_enc.c
+++ b/dnn/lpcnet_enc.c
@ -81,8 +81,8 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
    float xi, yi;
    xi = x[i];
    yi = x[i] + mem[0];
-    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
+    mem[0] = mem[1] + (b[0]*xi - a[0]*yi);
-    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
+    mem[1] = (b[1]*xi - a[1]*yi);
    y[i] = yi;
  }
 }
--- a/dnn/pitchdnn.c
+++ b/dnn/pitchdnn.c
@ -33,8 +33,7 @@ float compute_pitchdnn(
  /* xcorr*/
  OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
  compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
-  compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
+  compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
  compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
  compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
  compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
--- a/dnn/torch/neural-pitch/export_neuralpitch_weights.py
+++ b/dnn/torch/neural-pitch/export_neuralpitch_weights.py
@ -73,8 +73,7 @@ f"""
    conv_layers = [
        ('conv.1', "conv2d_1"),
-        ('conv.4', "conv2d_2"),
+        ('conv.4', "conv2d_2")
        ('conv.7', "conv2d_3")
    ]
--- a/dnn/torch/neural-pitch/models.py
+++ b/dnn/torch/neural-pitch/models.py
@ -86,13 +86,10 @@ class PitchDNN(torch.nn.Module):
        self.conv = torch.nn.Sequential(
            torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(1, 8, 3, bias=True),
+            torch.nn.Conv2d(1, 4, 3, bias=True),
            self.activation,
            torch.nn.ZeroPad2d((2,0,1,1)),
-            torch.nn.Conv2d(8, 8, 3, bias=True),
+            torch.nn.Conv2d(4, 1, 3, bias=True),
            self.activation,
            torch.nn.ZeroPad2d((2,0,1,1)),
            torch.nn.Conv2d(8, 1, 3, bias=True),
            self.activation,
        )