Pitch and fargan model updates
Removing one of the 2d conv layers for pitch estimation reduces complexity without noticeable degradation. FARGAN model has more adversarial training. Also, no need for the double precision in the low-pass filter.
This commit is contained in:
parent
c99054dad9
commit
ddd5669e79
5 changed files with 7 additions and 12 deletions
|
@ -9,7 +9,7 @@ set -e
|
||||||
srcdir=`dirname $0`
|
srcdir=`dirname $0`
|
||||||
test -n "$srcdir" && cd "$srcdir"
|
test -n "$srcdir" && cd "$srcdir"
|
||||||
|
|
||||||
dnn/download_model.sh 290be25
|
dnn/download_model.sh c99054d
|
||||||
|
|
||||||
echo "Updating build configuration files, please wait...."
|
echo "Updating build configuration files, please wait...."
|
||||||
|
|
||||||
|
|
|
@ -81,8 +81,8 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
|
||||||
float xi, yi;
|
float xi, yi;
|
||||||
xi = x[i];
|
xi = x[i];
|
||||||
yi = x[i] + mem[0];
|
yi = x[i] + mem[0];
|
||||||
mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
|
mem[0] = mem[1] + (b[0]*xi - a[0]*yi);
|
||||||
mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
|
mem[1] = (b[1]*xi - a[1]*yi);
|
||||||
y[i] = yi;
|
y[i] = yi;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,8 +33,7 @@ float compute_pitchdnn(
|
||||||
/* xcorr*/
|
/* xcorr*/
|
||||||
OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
|
OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
|
||||||
compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
|
compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
|
||||||
compute_conv2d(&model->conv2d_2, &conv1_tmp1[1], st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
|
compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
|
||||||
compute_conv2d(&model->conv2d_3, downsampler_in, st->xcorr_mem3, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
|
|
||||||
|
|
||||||
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
|
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
|
||||||
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
|
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
|
||||||
|
|
|
@ -73,8 +73,7 @@ f"""
|
||||||
|
|
||||||
conv_layers = [
|
conv_layers = [
|
||||||
('conv.1', "conv2d_1"),
|
('conv.1', "conv2d_1"),
|
||||||
('conv.4', "conv2d_2"),
|
('conv.4', "conv2d_2")
|
||||||
('conv.7', "conv2d_3")
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -86,13 +86,10 @@ class PitchDNN(torch.nn.Module):
|
||||||
|
|
||||||
self.conv = torch.nn.Sequential(
|
self.conv = torch.nn.Sequential(
|
||||||
torch.nn.ZeroPad2d((2,0,1,1)),
|
torch.nn.ZeroPad2d((2,0,1,1)),
|
||||||
torch.nn.Conv2d(1, 8, 3, bias=True),
|
torch.nn.Conv2d(1, 4, 3, bias=True),
|
||||||
self.activation,
|
self.activation,
|
||||||
torch.nn.ZeroPad2d((2,0,1,1)),
|
torch.nn.ZeroPad2d((2,0,1,1)),
|
||||||
torch.nn.Conv2d(8, 8, 3, bias=True),
|
torch.nn.Conv2d(4, 1, 3, bias=True),
|
||||||
self.activation,
|
|
||||||
torch.nn.ZeroPad2d((2,0,1,1)),
|
|
||||||
torch.nn.Conv2d(8, 1, 3, bias=True),
|
|
||||||
self.activation,
|
self.activation,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue