Merge LACE/NoLACE under OSCE framework

This commit is contained in:
Jan Buethe 2023-11-08 14:03:39 +01:00 committed by Jean-Marc Valin
parent 591c8bad70
commit 7d328f5bfa
No known key found for this signature in database
GPG key ID: 531A52533318F00A
49 changed files with 4061 additions and 103 deletions

View file

@ -29,6 +29,12 @@ jobs:
compiler: gcc,
buildconfig: --enable-assertions --enable-custom-modes
}
- {
name: "Linux/GCC/EnableDNN",
os: ubuntu-latest,
compiler: gcc,
buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce
}
steps:
- uses: actions/checkout@v3
# No AutoMake on Mac so let's install it
@ -42,4 +48,4 @@ jobs:
- name: Build
run: make -j 2
- name: Test
run: make check -j 2
run: make check -j 2

View file

@ -74,7 +74,7 @@ jobs:
run: mkdir build
- name: Configure
working-directory: ./build
run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON
run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON
- name: Build
working-directory: ./build
run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package

View file

@ -64,9 +64,9 @@ autoconf:
- !reference [.snippets, git_prep]
script:
- ./autogen.sh
- CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx
- CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce
- make -j16
- DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
- DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
cache:
paths:
- "src/*.o"
@ -87,7 +87,7 @@ cmake:
script:
- ./autogen.sh
- mkdir build
- cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_X86_PRESUME_AVX2=ON
- cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON
- cmake --build build
- cd build && ctest --output-on-failure -j 16
@ -101,7 +101,7 @@ cmake:
script:
- ./autogen.sh
- mkdir builddir
- meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
- meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
- meson compile -C builddir
- meson test -C builddir
#- meson dist --no-tests -C builddir

View file

@ -87,6 +87,10 @@ set(OPUS_DRED_HELP_STR "enable DRED.")
option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
set(OPUS_OSCE_HELP_STR "enable OSCE.")
option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
if(APPLE)
set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
@ -364,8 +368,6 @@ endif()
add_sources_group(opus silk ${silk_headers} ${silk_sources})
add_sources_group(opus celt ${celt_headers} ${celt_sources})
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
if(OPUS_FIXED_POINT)
add_sources_group(opus silk ${silk_sources_fixed})
@ -380,11 +382,26 @@ if(NOT OPUS_ENABLE_FLOAT_API)
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
endif()
if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
set(OPUS_DNN TRUE)
else()
set(OPUS_DNN FALSE)
endif()
if (OPUS_DNN)
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
endif()
if (OPUS_DRED)
add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
target_compile_definitions(opus PRIVATE ENABLE_DRED)
if(NOT OPUS_DEEP_PLC)
target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
endif()
endif()
if (OPUS_OSCE)
add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
target_compile_definitions(opus PRIVATE ENABLE_OSCE)
endif()
if(NOT OPUS_DISABLE_INTRINSICS)
@ -405,7 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
endif()
add_sources_group(opus celt ${celt_sources_x86_rtcd})
add_sources_group(opus silk ${silk_sources_x86_rtcd})
add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
endif()
endif()
if(SSE1_SUPPORTED)
@ -427,7 +446,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
if(SSE2_SUPPORTED)
if(OPUS_X86_MAY_HAVE_SSE2)
add_sources_group(opus celt ${celt_sources_sse2})
add_sources_group(opus lpcnet ${dnn_sources_sse2})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_sse2})
endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
if(NOT MSVC)
set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
@ -445,7 +466,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
if(OPUS_X86_MAY_HAVE_SSE4_1)
add_sources_group(opus celt ${celt_sources_sse4_1})
add_sources_group(opus silk ${silk_sources_sse4_1})
add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
if(NOT MSVC)
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
@ -471,7 +494,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
add_sources_group(opus celt ${celt_sources_avx2})
add_sources_group(opus silk ${silk_sources_avx2})
add_sources_group(opus silk ${silk_sources_float_avx2})
add_sources_group(opus lpcnet ${dnn_sources_avx2})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_avx2})
endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
if(MSVC)
set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
@ -524,7 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
add_sources_group(opus celt ${celt_sources_arm_neon_intr})
add_sources_group(opus silk ${silk_sources_arm_neon_intr})
add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
endif()
# silk arm neon depends on main_Fix.h
target_include_directories(opus PRIVATE silk/fixed)

View file

@ -25,6 +25,9 @@ endif
if ENABLE_DRED
LPCNET_SOURCES += $(DRED_SOURCES)
endif
if ENABLE_OSCE
LPCNET_SOURCES += $(OSCE_SOURCES)
endif
if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED)
@ -132,6 +135,9 @@ endif
if ENABLE_DRED
LPCNET_HEAD += $(DRED_HEAD)
endif
if ENABLE_OSCE
LPCNET_HEAD += $(OSCE_HEAD)
endif
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@

View file

@ -9,7 +9,7 @@ set -e
srcdir=`dirname $0`
test -n "$srcdir" && cd "$srcdir"
dnn/download_model.sh df63771
dnn/download_model.sh 591c8ba
echo "Updating build configuration files, please wait...."

View file

@ -42,8 +42,10 @@ get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)
get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)

View file

@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc],
[AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
[enable_deep_plc=no])
AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[
AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
])
AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"])
AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
has_float_approx=no
case "$host_cpu" in
@ -904,6 +904,31 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [
AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
])
AC_ARG_ENABLE([osce-training-data],
AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
[enable_osc_training_data=no]
)
AS_IF([test "$enable_osce_training_data" = "yes"], [
AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
])
AC_MSG_CHECKING([argument osce training data])
AS_IF([test "$enable_osce_training_data" = "yes"], [
AC_MSG_RESULT([yes])
], [AC_MSG_RESULT([no])])
AC_ARG_ENABLE([osce],
AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
[enable_osce=no]
)
AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
])
AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
AC_ARG_ENABLE([extra-programs],

449
dnn/adaconvtest.c Normal file
View file

@ -0,0 +1,449 @@
#include "lace_data.h"
#include "nolace_data.h"
#include "osce.h"
#include "nndsp.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
extern const WeightArray lacelayers_arrays[];
extern const WeightArray nolacelayers_arrays[];
void adaconv_compare(
const char * prefix,
int num_frames,
AdaConvState* hAdaConv,
LinearLayer *kernel_layer,
LinearLayer *gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain
)
{
char feature_file[256];
char x_in_file[256];
char x_out_file[256];
char message[512];
int i_frame, i_sample;
float mse;
float features[512];
float x_in[512];
float x_out_ref[512];
float x_out[512];
float window[40];
init_adaconv_state(hAdaConv);
compute_overlap_window(window, 40);
FILE *f_features, *f_x_in, *f_x_out;
strcpy(feature_file, prefix);
strcat(feature_file, "_features.f32");
f_features = fopen(feature_file, "r");
if (f_features == NULL)
{
sprintf(message, "could not open file %s", feature_file);
perror(message);
exit(1);
}
strcpy(x_in_file, prefix);
strcat(x_in_file, "_x_in.f32");
f_x_in = fopen(x_in_file, "r");
if (f_x_in == NULL)
{
sprintf(message, "could not open file %s", x_in_file);
perror(message);
exit(1);
}
strcpy(x_out_file, prefix);
strcat(x_out_file, "_x_out.f32");
f_x_out = fopen(x_out_file, "r");
if (f_x_out == NULL)
{
sprintf(message, "could not open file %s", x_out_file);
perror(message);
exit(1);
}
for (i_frame = 0; i_frame < num_frames; i_frame ++)
{
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
exit(1);
}
if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
exit(1);
}
if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
exit(1);
}
adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
filter_gain_a, filter_gain_b, shape_gain, window, 0);
mse = 0;
for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
{
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
}
mse = sqrt(mse / (frame_size * out_channels));
printf("rmse[%d] %f\n", i_frame, mse);
}
}
void adacomb_compare(
const char * prefix,
int num_frames,
AdaCombState* hAdaComb,
LinearLayer *kernel_layer,
LinearLayer *gain_layer,
LinearLayer *global_gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit
)
{
char feature_file[256];
char x_in_file[256];
char p_in_file[256];
char x_out_file[256];
char message[512];
int i_frame, i_sample;
float mse;
float features[512];
float x_in[512];
float x_out_ref[512];
float x_out[512];
int pitch_lag;
float window[40];
init_adacomb_state(hAdaComb);
compute_overlap_window(window, 40);
FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
strcpy(feature_file, prefix);
strcat(feature_file, "_features.f32");
f_features = fopen(feature_file, "r");
if (f_features == NULL)
{
sprintf(message, "could not open file %s", feature_file);
perror(message);
exit(1);
}
strcpy(x_in_file, prefix);
strcat(x_in_file, "_x_in.f32");
f_x_in = fopen(x_in_file, "r");
if (f_x_in == NULL)
{
sprintf(message, "could not open file %s", x_in_file);
perror(message);
exit(1);
}
strcpy(p_in_file, prefix);
strcat(p_in_file, "_p_in.s32");
f_p_in = fopen(p_in_file, "r");
if (f_p_in == NULL)
{
sprintf(message, "could not open file %s", p_in_file);
perror(message);
exit(1);
}
strcpy(x_out_file, prefix);
strcat(x_out_file, "_x_out.f32");
f_x_out = fopen(x_out_file, "r");
if (f_x_out == NULL)
{
sprintf(message, "could not open file %s", x_out_file);
perror(message);
exit(1);
}
for (i_frame = 0; i_frame < num_frames; i_frame ++)
{
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
exit(1);
}
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
exit(1);
}
if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
exit(1);
}
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
exit(1);
}
adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
mse = 0;
for (i_sample = 0; i_sample < frame_size; i_sample ++)
{
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
}
mse = sqrt(mse / (frame_size));
printf("rmse[%d] %f\n", i_frame, mse);
}
}
void adashape_compare(
const char * prefix,
int num_frames,
AdaShapeState* hAdaShape,
LinearLayer *alpha1,
LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k
)
{
char feature_file[256];
char x_in_file[256];
char x_out_file[256];
char message[512];
int i_frame, i_sample;
float mse;
float features[512];
float x_in[512];
float x_out_ref[512];
float x_out[512];
init_adashape_state(hAdaShape);
FILE *f_features, *f_x_in, *f_x_out;
strcpy(feature_file, prefix);
strcat(feature_file, "_features.f32");
f_features = fopen(feature_file, "r");
if (f_features == NULL)
{
sprintf(message, "could not open file %s", feature_file);
perror(message);
exit(1);
}
strcpy(x_in_file, prefix);
strcat(x_in_file, "_x_in.f32");
f_x_in = fopen(x_in_file, "r");
if (f_x_in == NULL)
{
sprintf(message, "could not open file %s", x_in_file);
perror(message);
exit(1);
}
strcpy(x_out_file, prefix);
strcat(x_out_file, "_x_out.f32");
f_x_out = fopen(x_out_file, "r");
if (f_x_out == NULL)
{
sprintf(message, "could not open file %s", x_out_file);
perror(message);
exit(1);
}
for (i_frame = 0; i_frame < num_frames; i_frame ++)
{
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
exit(1);
}
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
exit(1);
}
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
exit(1);
}
adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
frame_size, avg_pool_k, 0);
mse = 0;
for (i_sample = 0; i_sample < frame_size; i_sample ++)
{
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
}
mse = sqrt(mse / (frame_size));
printf("rmse[%d] %f\n", i_frame, mse);
}
}
int main()
{
LACELayers hLACE;
NOLACELayers hNoLACE;
AdaConvState hAdaConv;
AdaCombState hAdaComb;
AdaShapeState hAdaShape;
init_adaconv_state(&hAdaConv);
init_lacelayers(&hLACE, lacelayers_arrays);
init_nolacelayers(&hNoLACE, nolacelayers_arrays);
printf("\ntesting lace.af1 (1 in, 1 out)...\n");
adaconv_compare(
"testvectors/lace_af1",
5,
&hAdaConv,
&hLACE.lace_af1_kernel,
&hLACE.lace_af1_gain,
LACE_AF1_FEATURE_DIM,
LACE_AF1_FRAME_SIZE,
LACE_AF1_OVERLAP_SIZE,
LACE_AF1_IN_CHANNELS,
LACE_AF1_OUT_CHANNELS,
LACE_AF1_KERNEL_SIZE,
LACE_AF1_LEFT_PADDING,
LACE_AF1_FILTER_GAIN_A,
LACE_AF1_FILTER_GAIN_B,
LACE_AF1_SHAPE_GAIN
);
printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
adaconv_compare(
"testvectors/nolace_af1",
5,
&hAdaConv,
&hNoLACE.nolace_af1_kernel,
&hNoLACE.nolace_af1_gain,
NOLACE_AF1_FEATURE_DIM,
NOLACE_AF1_FRAME_SIZE,
NOLACE_AF1_OVERLAP_SIZE,
NOLACE_AF1_IN_CHANNELS,
NOLACE_AF1_OUT_CHANNELS,
NOLACE_AF1_KERNEL_SIZE,
NOLACE_AF1_LEFT_PADDING,
NOLACE_AF1_FILTER_GAIN_A,
NOLACE_AF1_FILTER_GAIN_B,
NOLACE_AF1_SHAPE_GAIN
);
printf("testing nolace.af4 (2 in, 1 out)...\n");
adaconv_compare(
"testvectors/nolace_af4",
5,
&hAdaConv,
&hNoLACE.nolace_af4_kernel,
&hNoLACE.nolace_af4_gain,
NOLACE_AF4_FEATURE_DIM,
NOLACE_AF4_FRAME_SIZE,
NOLACE_AF4_OVERLAP_SIZE,
NOLACE_AF4_IN_CHANNELS,
NOLACE_AF4_OUT_CHANNELS,
NOLACE_AF4_KERNEL_SIZE,
NOLACE_AF4_LEFT_PADDING,
NOLACE_AF4_FILTER_GAIN_A,
NOLACE_AF4_FILTER_GAIN_B,
NOLACE_AF4_SHAPE_GAIN
);
printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
adaconv_compare(
"testvectors/nolace_af2",
5,
&hAdaConv,
&hNoLACE.nolace_af2_kernel,
&hNoLACE.nolace_af2_gain,
NOLACE_AF2_FEATURE_DIM,
NOLACE_AF2_FRAME_SIZE,
NOLACE_AF2_OVERLAP_SIZE,
NOLACE_AF2_IN_CHANNELS,
NOLACE_AF2_OUT_CHANNELS,
NOLACE_AF2_KERNEL_SIZE,
NOLACE_AF2_LEFT_PADDING,
NOLACE_AF2_FILTER_GAIN_A,
NOLACE_AF2_FILTER_GAIN_B,
NOLACE_AF2_SHAPE_GAIN
);
printf("\ntesting lace.cf1...\n");
adacomb_compare(
"testvectors/lace_cf1",
5,
&hAdaComb,
&hLACE.lace_cf1_kernel,
&hLACE.lace_cf1_gain,
&hLACE.lace_cf1_global_gain,
LACE_CF1_FEATURE_DIM,
LACE_CF1_FRAME_SIZE,
LACE_CF1_OVERLAP_SIZE,
LACE_CF1_KERNEL_SIZE,
LACE_CF1_LEFT_PADDING,
LACE_CF1_FILTER_GAIN_A,
LACE_CF1_FILTER_GAIN_B,
LACE_CF1_LOG_GAIN_LIMIT
);
printf("\ntesting nolace.tdshape1...\n");
adashape_compare(
"testvectors/nolace_tdshape1",
5,
&hAdaShape,
&hNoLACE.nolace_tdshape1_alpha1,
&hNoLACE.nolace_tdshape1_alpha2,
NOLACE_TDSHAPE1_FEATURE_DIM,
NOLACE_TDSHAPE1_FRAME_SIZE,
NOLACE_TDSHAPE1_AVG_POOL_K
);
return 0;
}
/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */

View file

@ -5,6 +5,11 @@ if opt_enable_dred
dnn_sources += dred_sources
endif
osce_sources = sources['OSCE_SOURCES']
if opt_enable_osce
dnn_sources += osce_sources
endif
dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']

412
dnn/nndsp.c Normal file
View file

@ -0,0 +1,412 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "nndsp.h"
#include "arch.h"
#include "nnet.h"
#include "os_support.h"
#include "pitch.h"
#include <math.h>
#ifndef M_PI
#define M_PI 3.141592653589793f
#endif
#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
void init_adaconv_state(AdaConvState *hAdaConv)
{
OPUS_CLEAR(hAdaConv, 1);
}
void init_adacomb_state(AdaCombState *hAdaComb)
{
OPUS_CLEAR(hAdaComb, 1);
}
void init_adashape_state(AdaShapeState *hAdaShape)
{
OPUS_CLEAR(hAdaShape, 1);
}
void compute_overlap_window(float *window, int overlap_size)
{
int i_sample;
for (i_sample=0; i_sample < overlap_size; i_sample++)
{
window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
}
}
#ifdef DEBUG_NNDSP
void print_float_vector(const char* name, const float *vec, int length)
{
for (int i = 0; i < length; i ++)
{
printf("%s[%d]: %f\n", name, i, vec[i]);
}
}
#endif
static void scale_kernel(
float *kernel,
int in_channels,
int out_channels,
int kernel_size,
float *gain
)
/* normalizes (p-norm) kernel over input channel and kernel dimension */
{
float norm;
int i_in_channels, i_out_channels, i_kernel;
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
{
norm = 0;
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
{
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
{
norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
}
}
#ifdef DEBUG_NNDSP
printf("kernel norm: %f, %f\n", norm, sqrt(norm));
#endif
norm = 1.f / (1e-6f + sqrt(norm));
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
{
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
{
kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
}
}
}
}
static void transform_gains(
float *gains,
int num_gains,
float filter_gain_a,
float filter_gain_b
)
{
int i;
for (i = 0; i < num_gains; i++)
{
gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
}
}
void adaconv_process_frame(
AdaConvState* hAdaConv,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain,
float *window,
int arch
)
{
float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
float kernel0[ADACONV_MAX_KERNEL_SIZE];
float kernel1[ADACONV_MAX_KERNEL_SIZE];
float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
float *p_input;
int i_in_channels, i_out_channels, i_sample;
(void) feature_dim; /* ToDo: figure out whether we might need this information */
celt_assert(shape_gain == 1);
celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
celt_assert(kernel_size < frame_size);
OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
#ifdef DEBUG_NNDSP
print_float_vector("x_in", x_in, in_channels * frame_size);
#endif
/* prepare input */
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
{
OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
}
p_input = input_buffer + kernel_size;
/* calculate new kernel and new gain */
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
#ifdef DEBUG_NNDSP
print_float_vector("features", features, feature_dim);
print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
#endif
transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
#ifdef DEBUG_NNDSP
print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
print_float_vector("adaconv_gain", gain_buffer, out_channels);
#endif
/* calculate overlapping part using kernel from last frame */
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
{
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
{
OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
}
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
{
output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
}
}
}
OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
#ifdef DEBUG_NNDSP
print_float_vector("x_out", x_out, out_channels * frame_size);
#endif
/* buffer update */
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
{
OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
}
OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
}
void adacomb_process_frame(
AdaCombState* hAdaComb,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
const LinearLayer *global_gain_layer,
int pitch_lag,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit,
float *window,
int arch
)
{
float output_buffer[ADACOMB_MAX_FRAME_SIZE];
float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
float gain, global_gain;
float *p_input;
int i_sample;
float kernel[16];
float last_kernel[16];
(void) feature_dim; /* ToDo: figure out whether we might need this information */
OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
/* calculate new kernel and new gain */
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
#ifdef DEBUG_NNDSP
print_float_vector("features", features, feature_dim);
print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
print_float_vector("adacomb_gain_raw", &gain, 1);
print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
#endif
gain = exp(log_gain_limit - gain);
global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
#ifdef DEBUG_NNDSP
print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
print_float_vector("adacomb_gain", &gain, 1);
#endif
OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
OPUS_COPY(kernel, kernel_buffer, kernel_size);
OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
}
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
}
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
{
output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
}
OPUS_COPY(x_out, output_buffer, frame_size);
#ifdef DEBUG_NNDSP
print_float_vector("x_out", x_out, frame_size);
#endif
/* buffer update */
OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
hAdaComb->last_pitch_lag = pitch_lag;
hAdaComb->last_global_gain = global_gain;
}
void adashape_process_frame(
AdaShapeState *hAdaShape,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *alpha1,
const LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k,
int arch
)
{
float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
int i, k;
int tenv_size;
float mean;
float *tenv;
celt_assert(frame_size % avg_pool_k == 0);
celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
tenv_size = frame_size / avg_pool_k;
tenv = in_buffer + feature_dim;
OPUS_CLEAR(tenv, tenv_size + 1);
OPUS_COPY(in_buffer, features, feature_dim);
/* calculate temporal envelope */
mean = 0;
for (i = 0; i < tenv_size; i++)
{
for (k = 0; k < avg_pool_k; k++)
{
tenv[i] += fabs(x_in[i * avg_pool_k + k]);
}
tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
mean += tenv[i];
}
mean /= tenv_size;
for (i = 0; i < tenv_size; i++)
{
tenv[i] -= mean;
}
tenv[tenv_size] = mean;
#ifdef DEBUG_NNDSP
print_float_vector("tenv", tenv, tenv_size + 1);
#endif
/* calculate temporal weights */
#ifdef DEBUG_NNDSP
print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
#endif
compute_generic_conv1d(alpha1, out_buffer, hAdaShape->conv_alpha1_state, in_buffer, feature_dim + tenv_size + 1, ACTIVATION_LINEAR, arch);
#ifdef DEBUG_NNDSP
print_float_vector("alpha1_out", out_buffer, frame_size);
#endif
/* compute leaky ReLU by hand. ToDo: try tanh activation */
for (i = 0; i < frame_size; i ++)
{
in_buffer[i] = out_buffer[i] >= 0 ? out_buffer[i] : 0.2f * out_buffer[i];
}
#ifdef DEBUG_NNDSP
print_float_vector("post_alpha1", in_buffer, frame_size);
#endif
compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
/* shape signal */
for (i = 0; i < frame_size; i ++)
{
x_out[i] = exp(out_buffer[i]) * x_in[i];
}
}

141
dnn/nndsp.h Normal file
View file

@ -0,0 +1,141 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNDSP_H
#define NNDSP_H
#include "opus_types.h"
#include "nnet.h"
#include <string.h>
#define ADACONV_MAX_KERNEL_SIZE 16
#define ADACONV_MAX_INPUT_CHANNELS 2
#define ADACONV_MAX_OUTPUT_CHANNELS 2
#define ADACONV_MAX_FRAME_SIZE 80
#define ADACONV_MAX_OVERLAP_SIZE 40
#define ADACOMB_MAX_LAG 300
#define ADACOMB_MAX_KERNEL_SIZE 16
#define ADACOMB_MAX_FRAME_SIZE 80
#define ADACOMB_MAX_OVERLAP_SIZE 40
#define ADASHAPE_MAX_INPUT_DIM 512
#define ADASHAPE_MAX_FRAME_SIZE 160
/*#define DEBUG_NNDSP*/
#ifdef DEBUG_NNDSP
#include <stdio.h>
#endif
void print_float_vector(const char* name, const float *vec, int length);
typedef struct {
float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
float last_gain;
} AdaConvState;
typedef struct {
float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
float last_global_gain;
int last_pitch_lag;
} AdaCombState;
typedef struct {
float conv_alpha1_state[ADASHAPE_MAX_INPUT_DIM];
float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
} AdaShapeState;
void init_adaconv_state(AdaConvState *hAdaConv);
void init_adacomb_state(AdaCombState *hAdaComb);
void init_adashape_state(AdaShapeState *hAdaShape);
void compute_overlap_window(float *window, int overlap_size);
void adaconv_process_frame(
AdaConvState* hAdaConv,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
int feature_dim, /* not strictly necessary */
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain,
float *window,
int arch
);
void adacomb_process_frame(
AdaCombState* hAdaComb,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
const LinearLayer *global_gain_layer,
int pitch_lag,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit,
float *window,
int arch
);
void adashape_process_frame(
AdaShapeState *hAdaShape,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *alpha1,
const LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k,
int arch
);
#endif

View file

@ -41,6 +41,10 @@
#include "os_support.h"
#include "vec.h"
#ifdef ENABLE_OSCE
#include "osce_config.h"
#endif
#ifdef NO_OPTIMIZATIONS
#if defined(_MSC_VER)
#pragma message ("Compiling without any vectorization. This code will be very slow")
@ -59,8 +63,11 @@ void compute_generic_dense(const LinearLayer *layer, float *output, const float
compute_activation(output, output, layer->nb_outputs, activation, arch);
}
#ifdef ENABLE_OSCE
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
#else
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)
#endif
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
{

View file

@ -64,13 +64,29 @@ static OPUS_INLINE float relu(float x)
return x < 0 ? 0 : x;
}
/*#define HIGH_ACCURACY */
void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
{
int i;
if (activation == ACTIVATION_SIGMOID) {
#ifdef HIGH_ACCURACY
for (int n=0; n<N; n++)
{
output[n] = 1.f / (1 + exp(-input[n]));
}
#else
vec_sigmoid(output, input, N);
#endif
} else if (activation == ACTIVATION_TANH) {
#ifdef HIGH_ACCURACY
for (int n=0; n<N; n++)
{
output[n] = tanh(input[n]);
}
#else
vec_tanh(output, input, N);
#endif
} else if (activation == ACTIVATION_SWISH) {
vec_swish(output, input, N);
} else if (activation == ACTIVATION_RELU) {

1411
dnn/osce.c Normal file

File diff suppressed because it is too large Load diff

81
dnn/osce.h Normal file
View file

@ -0,0 +1,81 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_H
#define OSCE_H
#include "opus_types.h"
/*#include "osce_config.h"*/
#ifndef DISABLE_LACE
#include "lace_data.h"
#endif
#ifndef DISABLE_NOLACE
#include "nolace_data.h"
#endif
#include "nndsp.h"
#include "nnet.h"
#include "osce_structs.h"
#include "structs.h"
#define OSCE_METHOD_NONE 0
#ifndef DISABLE_LACE
#define OSCE_METHOD_LACE 1
#endif
#ifndef DISABLE_NOLACE
#define OSCE_METHOD_NOLACE 2
#endif
#if !defined(DISABLE_NOLACE)
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
#elif !defined(DISABLE_LACE)
#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
#else
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
#endif
/* API */
void osce_enhance_frame(
OSCEModel *model, /* I OSCE model struct */
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
opus_int16 xq[], /* I/O Decoded speech */
opus_int32 num_bits, /* I Size of SILK payload in bits */
int arch /* I Run-time architecture */
);
int osce_load_models(OSCEModel *hModel, const unsigned char *data, int len);
void osce_reset(silk_OSCE_struct *hOSCE, int method);
#endif

62
dnn/osce_config.h Normal file
View file

@ -0,0 +1,62 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_CONFIG
#define OSCE_CONFIG
#define OSCE_MAX_RNN_NEURONS 256
#define OSCE_FEATURES_MAX_HISTORY 350
#define OSCE_FEATURE_DIM 93
#define OSCE_MAX_FEATURE_FRAMES 4
#define OSCE_CLEAN_SPEC_NUM_BANDS 64
#define OSCE_NOISY_SPEC_NUM_BANDS 18
#define OSCE_NO_PITCH_VALUE 7
#define OSCE_PREEMPH 0.85f
#define OSCE_PITCH_HANGOVER 8
#define OSCE_CLEAN_SPEC_START 0
#define OSCE_CLEAN_SPEC_LENGTH 64
#define OSCE_NOISY_CEPSTRUM_START 64
#define OSCE_NOISY_CEPSTRUM_LENGTH 18
#define OSCE_ACORR_START 82
#define OSCE_ACORR_LENGTH 5
#define OSCE_LTP_START 87
#define OSCE_LTP_LENGTH 5
#define OSCE_LOG_GAIN_START 92
#define OSCE_LOG_GAIN_LENGTH 1
#endif

454
dnn/osce_features.c Normal file
View file

@ -0,0 +1,454 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define OSCE_SPEC_WINDOW_SIZE 320
#define OSCE_SPEC_NUM_FREQS 161
/*DEBUG*/
/*#define WRITE_FEATURES*/
/*#define DEBUG_PRING*/
/*******/
#include "stack_alloc.h"
#include "osce_features.h"
#include "kiss_fft.h"
#include "os_support.h"
#include "osce.h"
#include "freq.h"
#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
#include <stdio.h>
#include <stdlib.h>
#endif
static const int center_bins_clean[64] = {
0, 2, 5, 8, 10, 12, 15, 18,
20, 22, 25, 28, 30, 33, 35, 38,
40, 42, 45, 48, 50, 52, 55, 58,
60, 62, 65, 68, 70, 73, 75, 78,
80, 82, 85, 88, 90, 92, 95, 98,
100, 102, 105, 108, 110, 112, 115, 118,
120, 122, 125, 128, 130, 132, 135, 138,
140, 142, 145, 148, 150, 152, 155, 160
};
static const int center_bins_noisy[18] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 40, 48, 56, 64, 80, 96, 112,
136, 160
};
static const float band_weights_clean[64] = {
0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f
};
static const float band_weights_noisy[18] = {
0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f,
0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f,
0.041666666667f, 0.080000000000f
};
static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f,
0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f,
0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f,
0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f,
0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f,
0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f,
0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f,
0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f,
0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f,
0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f,
0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f,
0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f,
0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f,
0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f,
0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f,
0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f,
0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f,
0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f,
0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f,
0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f,
0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f,
0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f,
0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f,
0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f,
0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f,
0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f,
0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f,
0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f,
0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f,
0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f,
0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f,
0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f,
0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f,
0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f,
0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f,
0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f,
0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f,
0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f,
0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f,
0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f,
0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f,
0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f,
0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f,
0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f,
0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f,
0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f,
0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f,
0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f,
0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f,
0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f,
0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f,
0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f,
0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f,
0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f,
0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f,
0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f,
0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f,
0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f,
0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f,
0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f,
0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f,
0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f
};
static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
{
int b, i;
float frac;
celt_assert(x_in != x_out)
x_out[0] = 0;
for (b = 0; b < num_bands - 1; b++)
{
x_out[b+1] = 0;
for (i = center_bins[b]; i < center_bins[b+1]; i++)
{
frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
x_out[b] += band_weights[b] * frac * x_in[i];
x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
}
}
x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
#ifdef DEBUG_PRINT
for (b = 0; b < num_bands; b++)
{
printf("band[%d]: %f\n", b, x_out[b]);
}
#endif
}
static void mag_spec_320_onesided(float *out, float *in)
{
celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
int k;
forward_transform(buffer, in);
for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
{
out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
#ifdef DEBUG_PRINT
printf("magspec[%d]: %f\n", k, out[k]);
#endif
}
}
static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
{
float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
int i;
/* zero expansion */
buffer[0] = 1;
for (i = 0; i < lpc_order; i++)
{
buffer[i+1] = - (float)a_q12[i] / (1U << 12);
}
/* calculate and invert magnitude spectrum */
mag_spec_320_onesided(buffer, buffer);
for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
{
buffer[i] = 1.f / (buffer[i] + 1e-9f);
}
/* apply filterbank */
apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
/* log and scaling */
for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
{
spec[i] = 0.3f * log(spec[i] + 1e-9f);
}
}
static void calculate_cepstrum(float *cepstrum, float *signal)
{
float buffer[OSCE_SPEC_WINDOW_SIZE];
float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
int n;
celt_assert(cepstrum != signal)
for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
{
buffer[n] = osce_window[n] * signal[n];
}
/* calculate magnitude spectrum */
mag_spec_320_onesided(buffer, buffer);
/* accumulate bands */
apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
/* log domain conversion */
for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
{
spec[n] = log(spec[n] + 1e-9f);
#ifdef DEBUG_PRINT
printf("logspec[%d]: %f\n", n, spec[n]);
#endif
}
/* DCT-II (orthonormal) */
celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
dct(cepstrum, spec);
}
static void calculate_acorr(float *acorr, float *signal, int lag)
{
int n, k;
celt_assert(acorr != signal)
for (k = -2; k <= 2; k++)
{
acorr[k+2] = 0;
float xx = 0;
float xy = 0;
float yy = 0;
for (n = 0; n < 80; n++)
{
/* obviously wasteful -> fix later */
xx += signal[n] * signal[n];
yy += signal[n - lag + k] * signal[n - lag + k];
xy += signal[n] * signal[n - lag + k];
}
acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
}
}
static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
{
int new_lag;
#ifdef OSCE_HANGOVER_BUGFIX
#define TESTBIT 1
#else
#define TESTBIT 0
#endif
/* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
/* enter hangover */
{
new_lag = OSCE_NO_PITCH_VALUE;
if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
{
new_lag = psFeatures->last_lag;
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
}
}
else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
/* continue hangover */
{
new_lag = psFeatures->last_lag;
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
}
else if (type != TYPE_VOICED)
/* unvoiced frame after hangover */
{
new_lag = OSCE_NO_PITCH_VALUE;
psFeatures->pitch_hangover_count = 0;
}
else
/* voiced frame: update last_lag */
{
new_lag = lag;
psFeatures->last_lag = lag;
psFeatures->pitch_hangover_count = 0;
}
/* buffer update */
psFeatures->last_type = type;
/* with the current setup this should never happen (but who knows...) */
celt_assert(new_lag)
return new_lag;
}
void osce_calculate_features(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
float *features, /* O input features */
float *numbits, /* O numbits and smoothed numbits */
int *periods, /* O pitch lags on subframe basis */
const opus_int16 xq[], /* I Decoded speech */
opus_int32 num_bits /* I Size of SILK payload in bits */
)
{
int num_subframes, num_samples;
float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
float *frame, *pfeatures;
OSCEFeatureState *psFeatures;
int i, n, k;
#ifdef WRITE_FEATURES
static FILE *f_feat = NULL;
if (f_feat == NULL)
{
f_feat = fopen("assembled_features.f32", "wb");
}
#endif
/*OPUS_CLEAR(buffer, 1);*/
memset(buffer, 0, sizeof(buffer));
num_subframes = psDec->nb_subfr;
num_samples = num_subframes * 80;
psFeatures = &psDec->osce.features;
/* smooth bit count */
psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
numbits[0] = num_bits;
#ifdef OSCE_NUMBITS_BUGFIX
numbits[1] = psFeatures->numbits_smooth;
#else
numbits[1] = num_bits;
#endif
for (n = 0; n < num_samples; n++)
{
buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
}
OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
for (k = 0; k < num_subframes; k++)
{
pfeatures = features + k * OSCE_FEATURE_DIM;
frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
/* clean spectrum from lpcs (update every other frame) */
if (k % 2 == 0)
{
calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
}
else
{
OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
}
/* noisy cepstrum from signal (update every other frame) */
if (k % 2 == 0)
{
calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
}
else
{
OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
}
/* pitch hangover and zero value replacement */
periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
/* auto-correlation around pitch lag */
calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
/* ltp */
celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
for (i = 0; i < OSCE_LTP_LENGTH; i++)
{
pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
}
/* frame gain */
pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
#ifdef WRITE_FEATURES
fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
#endif
}
/* buffer update */
OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
}
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
{
int i;
celt_assert(length >= 160);
for (i = 0; i < 160; i++)
{
x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
}
}

50
dnn/osce_features.h Normal file
View file

@ -0,0 +1,50 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_FEATURES_H
#define OSCE_FEATURES_H
#include "structs.h"
#include "opus_types.h"
#define OSCE_NUMBITS_BUGFIX
void osce_calculate_features(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I Decoder control */
float *features, /* O input features */
float *numbits, /* O numbits and smoothed numbits */
int *periods, /* O pitch lags on subframe basis */
const opus_int16 xq[], /* I Decoded speech */
opus_int32 num_bits /* I Size of SILK payload in bits */
);
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
#endif

124
dnn/osce_structs.h Normal file
View file

@ -0,0 +1,124 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef OSCE_STRUCTS_H
#define OSCE_STRUCTS_H
#include "opus_types.h"
#include "osce_config.h"
#ifndef DISABLE_LACE
#include "lace_data.h"
#endif
#ifndef DISABLE_NOLACE
#include "nolace_data.h"
#endif
#include "nndsp.h"
#include "nnet.h"
/* feature calculation */
typedef struct {
float numbits_smooth;
int pitch_hangover_count;
int last_lag;
int last_type;
float signal_history[OSCE_FEATURES_MAX_HISTORY];
int reset;
} OSCEFeatureState;
#ifndef DISABLE_LACE
/* LACE */
typedef struct {
float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
float feature_net_gru_state[LACE_COND_DIM];
AdaCombState cf1_state;
AdaCombState cf2_state;
AdaConvState af1_state;
float preemph_mem;
float deemph_mem;
} LACEState;
typedef struct
{
LACELayers layers;
float window[LACE_OVERLAP_SIZE];
} LACE;
#endif /* #ifndef DISABLE_LACE */
#ifndef DISABLE_NOLACE
/* NoLACE */
typedef struct {
float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
float feature_net_gru_state[NOLACE_COND_DIM];
float post_cf1_state[NOLACE_COND_DIM];
float post_cf2_state[NOLACE_COND_DIM];
float post_af1_state[NOLACE_COND_DIM];
float post_af2_state[NOLACE_COND_DIM];
float post_af3_state[NOLACE_COND_DIM];
AdaCombState cf1_state;
AdaCombState cf2_state;
AdaConvState af1_state;
AdaConvState af2_state;
AdaConvState af3_state;
AdaConvState af4_state;
AdaShapeState tdshape1_state;
AdaShapeState tdshape2_state;
AdaShapeState tdshape3_state;
float preemph_mem;
float deemph_mem;
} NoLACEState;
typedef struct {
NOLACELayers layers;
float window[LACE_OVERLAP_SIZE];
} NoLACE;
#endif /* #ifndef DISABLE_NOLACE */
/* OSCEModel */
typedef struct {
#ifndef DISABLE_LACE
LACE lace;
#endif
#ifndef DISABLE_NOLACE
NoLACE nolace;
#endif
} OSCEModel;
typedef union {
#ifndef DISABLE_LACE
LACEState lace;
#endif
#ifndef DISABLE_NOLACE
NoLACEState nolace;
#endif
} OSCEState;
#endif

View file

@ -0,0 +1,165 @@
"""
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
"""
import os
import argparse
import torch
import numpy as np
from models import model_dict
from utils import endoscopy
parser = argparse.ArgumentParser()
parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"')
parser.add_argument('output_folder', type=str, help='output folder for testvectors')
parser.add_argument('--debug', action='store_true', help='add debug output to output folder')
def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
feature_dim = adaconv.feature_dim
in_channels = adaconv.in_channels
out_channels = adaconv.out_channels
frame_size = adaconv.frame_size
features = torch.randn((1, num_frames, feature_dim))
x_in = torch.randn((1, in_channels, num_frames * frame_size))
x_out = adaconv(x_in, features, debug=debug)
features = features[0].detach().numpy()
x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
features.tofile(prefix + '_features.f32')
x_in.tofile(prefix + '_x_in.f32')
x_out.tofile(prefix + '_x_out.f32')
def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False):
feature_dim = adacomb.feature_dim
in_channels = 1
frame_size = adacomb.frame_size
features = torch.randn((1, num_frames, feature_dim))
x_in = torch.randn((1, in_channels, num_frames * frame_size))
p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames))
x_out = adacomb(x_in, features, p_in, debug=debug)
features = features[0].detach().numpy()
x_in = x_in[0].permute(1, 0).detach().numpy()
p_in = p_in[0].detach().numpy().astype(np.int32)
x_out = x_out[0].permute(1, 0).detach().numpy()
features.tofile(prefix + '_features.f32')
x_in.tofile(prefix + '_x_in.f32')
p_in.tofile(prefix + '_p_in.s32')
x_out.tofile(prefix + '_x_out.f32')
def create_adashape_testvector(prefix, adashape, num_frames):
feature_dim = adashape.feature_dim
frame_size = adashape.frame_size
features = torch.randn((1, num_frames, feature_dim))
x_in = torch.randn((1, 1, num_frames * frame_size))
x_out = adashape(x_in, features)
features = features[0].detach().numpy()
x_in = x_in.flatten().detach().numpy()
x_out = x_out.flatten().detach().numpy()
features.tofile(prefix + '_features.f32')
x_in.tofile(prefix + '_x_in.f32')
x_out.tofile(prefix + '_x_out.f32')
def create_feature_net_testvector(prefix, model, num_frames):
num_features = model.num_features
num_subframes = 4 * num_frames
input_features = torch.randn((1, num_subframes, num_features))
periods = torch.randint(32, 300, (1, num_subframes))
numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0])
pembed = model.pitch_embedding(periods)
nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1)
full_features = torch.cat((input_features, pembed, nembed), dim=-1)
cf = model.feature_net(full_features)
input_features.float().numpy().tofile(prefix + "_in_features.f32")
periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32")
numbits.float().numpy().tofile(prefix + "_numbits.f32")
full_features.detach().numpy().tofile(prefix + "_full_features.f32")
cf.detach().numpy().tofile(prefix + "_out_features.f32")
if __name__ == "__main__":
args = parser.parse_args()
os.makedirs(args.output_folder, exist_ok=True)
lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu')
nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu')
lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs'])
nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs'])
lace.load_state_dict(lace_checkpoint['state_dict'])
nolace.load_state_dict(nolace_checkpoint['state_dict'])
if args.debug:
endoscopy.init(args.output_folder)
# lace af1, 1 input channel, 1 output channel
create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug)
# nolace af1, 1 input channel, 2 output channels
create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug)
# nolace af4, 2 input channel, 1 output channels
create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug)
# nolace af2, 2 input channel, 2 output channels
create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug)
# lace cf1
create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug)
# nolace tdshape1
create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5)
# lace feature net
create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5)
if args.debug:
endoscopy.close()

View file

@ -49,7 +49,6 @@ class SilkEnhancementSet(Dataset):
num_bands_noisy_spec=18,
noisy_spec_scale='opus',
noisy_apply_dct=True,
add_offset=False,
add_double_lag_acorr=False,
):
@ -73,7 +72,6 @@ class SilkEnhancementSet(Dataset):
self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)
self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)
self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16)
self.clean_signal = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16)
@ -86,7 +84,6 @@ class SilkEnhancementSet(Dataset):
num_bands_noisy_spec,
noisy_spec_scale,
noisy_apply_dct,
add_offset,
add_double_lag_acorr)
self.history_len = 700 if add_double_lag_acorr else 350
@ -120,8 +117,7 @@ class SilkEnhancementSet(Dataset):
self.lpcs[frame_start : frame_stop],
self.gains[frame_start : frame_stop],
self.ltps[frame_start : frame_stop],
self.periods[frame_start : frame_stop],
self.offsets[frame_start : frame_stop]
self.periods[frame_start : frame_stop]
)
if self.preemph > 0:

View file

@ -40,10 +40,53 @@ import wexchange.torch
from wexchange.torch import dump_torch_weights
from models import model_dict
from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
from utils.layers.td_shaper import TDShaper
from wexchange.torch import dump_torch_weights
parser = argparse.ArgumentParser()
parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint')
parser.add_argument('output_dir', type=str, help='output folder')
parser.add_argument('--quantize', action="store_true", help='quantization according to schedule')
schedules = {
'nolace': [
('pitch_embedding', dict()),
('feature_net.conv1', dict()),
('feature_net.conv2', dict(quantize=True, scale=None)),
('feature_net.tconv', dict(quantize=True, scale=None)),
('feature_net.gru', dict()),
('cf1', dict(quantize=True, scale=None)),
('cf2', dict(quantize=True, scale=None)),
('af1', dict(quantize=True, scale=None)),
('tdshape1', dict()),
('tdshape2', dict()),
('tdshape3', dict()),
('af2', dict(quantize=True, scale=None)),
('af3', dict(quantize=True, scale=None)),
('af4', dict(quantize=True, scale=None)),
('post_cf1', dict(quantize=True, scale=None)),
('post_cf2', dict(quantize=True, scale=None)),
('post_af1', dict(quantize=True, scale=None)),
('post_af2', dict(quantize=True, scale=None)),
('post_af3', dict(quantize=True, scale=None))
],
'lace' : [
('pitch_embedding', dict()),
('feature_net.conv1', dict()),
('feature_net.conv2', dict(quantize=True, scale=None)),
('feature_net.tconv', dict(quantize=True, scale=None)),
('feature_net.gru', dict()),
('cf1', dict(quantize=True, scale=None)),
('cf2', dict(quantize=True, scale=None)),
('af1', dict(quantize=True, scale=None))
]
}
# auxiliary functions
@ -60,8 +103,28 @@ def sha1(filename):
return sha1.hexdigest()
def osce_dump_generic(writer, name, module):
if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \
or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \
or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU):
dump_torch_weights(writer, module, name=name, verbose=True)
else:
for child_name, child in module.named_children():
osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child)
def export_name(name):
return name.replace('.', '_')
name = name.replace('.', '_')
name = name.replace('feature_net', 'fnet')
return name
def osce_scheduled_dump(writer, prefix, model, schedule):
if not prefix.endswith('_'):
prefix += '_'
for name, kwargs in schedule:
dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True)
if __name__ == "__main__":
args = parser.parse_args()
@ -76,22 +139,34 @@ if __name__ == "__main__":
# create model and load weights
checkpoint = torch.load(checkpoint_path, map_location='cpu')
model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
model.load_state_dict(checkpoint['state_dict'])
# CWriter
model_name = checkpoint['setup']['model']['name']
cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper())
cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True)
# dump numbits_embedding parameters by hand
numbits_embedding = model.get_submodule('numbits_embedding')
weights = next(iter(numbits_embedding.parameters()))
for i, c in enumerate(weights):
cwriter.header.write(f"\nNUMBITS_COEF_{i} {float(c.detach())}f")
cwriter.header.write("\n\n")
# Add custom includes and global parameters
cwriter.header.write(f'''
#define {model_name.upper()}_PREEMPH {model.preemph}f
#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE}
#define {model_name.upper()}_OVERLAP_SIZE 40
#define {model_name.upper()}_NUM_FEATURES {model.num_features}
#define {model_name.upper()}_PITCH_MAX {model.pitch_max}
#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim}
#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]}
#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]}
#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim}
#define {model_name.upper()}_COND_DIM {model.cond_dim}
#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim}
''')
for i, s in enumerate(model.numbits_embedding.scale_factors):
cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n")
# dump layers
for name, module in model.named_modules():
if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding):
dump_torch_weights(cwriter, module, name=export_name(name), verbose=True)
if model_name in schedules and args.quantize:
osce_scheduled_dump(cwriter, model_name, model, schedules[model_name])
else:
osce_dump_generic(cwriter, model_name, model)
cwriter.close()

View file

@ -96,7 +96,7 @@ class LACE(NNSBase):
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
# spectral shaping
self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
def flop_count(self, rate=16000, verbose=False):

View file

@ -96,8 +96,8 @@ class NoLACE(NNSBase):
# comb filters
left_pad = self.kernel_size // 2
right_pad = self.kernel_size - 1 - left_pad
self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
# spectral shaping
self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)

View file

@ -41,13 +41,13 @@ class LimitedAdaptiveComb1d(nn.Module):
feature_dim,
frame_size=160,
overlap_size=40,
use_bias=True,
padding=None,
max_lag=256,
name=None,
gain_limit_db=10,
global_gain_limits_db=[-6, 6],
norm_p=2):
norm_p=2,
**kwargs):
"""
Parameters:
@ -87,7 +87,6 @@ class LimitedAdaptiveComb1d(nn.Module):
self.kernel_size = kernel_size
self.frame_size = frame_size
self.overlap_size = overlap_size
self.use_bias = use_bias
self.max_lag = max_lag
self.limit_db = gain_limit_db
self.norm_p = norm_p
@ -101,8 +100,6 @@ class LimitedAdaptiveComb1d(nn.Module):
# network for generating convolution weights
self.conv_kernel = nn.Linear(feature_dim, kernel_size)
if self.use_bias:
self.conv_bias = nn.Linear(feature_dim,1)
# comb filter gain
self.filter_gain = nn.Linear(feature_dim, 1)
@ -154,9 +151,6 @@ class LimitedAdaptiveComb1d(nn.Module):
conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))
conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True))
if self.use_bias:
conv_biases = self.conv_bias(features).permute(0, 2, 1)
conv_gains = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit)
# calculate gains
global_conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b)
@ -190,10 +184,6 @@ class LimitedAdaptiveComb1d(nn.Module):
new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1)
if self.use_bias:
new_chunk = new_chunk + conv_biases[:, :, i : i + 1]
offset = self.max_lag + self.padding[0]
new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size])
@ -223,10 +213,6 @@ class LimitedAdaptiveComb1d(nn.Module):
count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
# bias computation
if self.use_bias:
count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
# a0 computation
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels

View file

@ -46,12 +46,12 @@ class LimitedAdaptiveConv1d(nn.Module):
feature_dim,
frame_size=160,
overlap_size=40,
use_bias=True,
padding=None,
name=None,
gain_limits_db=[-6, 6],
shape_gain_db=0,
norm_p=2):
norm_p=2,
**kwargs):
"""
Parameters:
@ -90,7 +90,6 @@ class LimitedAdaptiveConv1d(nn.Module):
self.kernel_size = kernel_size
self.frame_size = frame_size
self.overlap_size = overlap_size
self.use_bias = use_bias
self.gain_limits_db = gain_limits_db
self.shape_gain_db = shape_gain_db
self.norm_p = norm_p
@ -104,9 +103,6 @@ class LimitedAdaptiveConv1d(nn.Module):
# network for generating convolution weights
self.conv_kernel = nn.Linear(feature_dim, in_channels * out_channels * kernel_size)
if self.use_bias:
self.conv_bias = nn.Linear(feature_dim, out_channels)
self.shape_gain = min(1, 10**(shape_gain_db / 20))
self.filter_gain = nn.Linear(feature_dim, out_channels)
@ -133,10 +129,6 @@ class LimitedAdaptiveConv1d(nn.Module):
count += 2 * (frame_rate * self.feature_dim * self.kernel_size)
count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
# bias computation
if self.use_bias:
count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
# gain computation
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
@ -183,9 +175,6 @@ class LimitedAdaptiveConv1d(nn.Module):
conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels
if self.use_bias:
conv_biases = self.conv_bias(features).permute(0, 2, 1)
# calculate gains
conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b)
if debug and batch_size == 1:

View file

@ -33,6 +33,7 @@ import numpy as np
import torch
import scipy
import scipy.signal
from utils.pitch import hangover, calculate_acorr_window
from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc
@ -59,7 +60,6 @@ def silk_feature_factory(no_pitch_value=256,
num_bands_noisy_spec=18,
noisy_spec_scale='opus',
noisy_apply_dct=True,
add_offset=False,
add_double_lag_acorr=False
):
@ -67,7 +67,7 @@ def silk_feature_factory(no_pitch_value=256,
fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True)
fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True)
def create_features(noisy, noisy_history, lpcs, gains, ltps, periods, offsets):
def create_features(noisy, noisy_history, lpcs, gains, ltps, periods):
periods = periods.copy()
@ -89,10 +89,7 @@ def silk_feature_factory(no_pitch_value=256,
acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr)
if add_offset:
features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains, offsets.reshape(-1, 1)), axis=-1, dtype=np.float32)
else:
features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
return features, periods.astype(np.int64)
@ -110,7 +107,6 @@ def load_inference_data(path,
num_bands_noisy_spec=18,
noisy_spec_scale='opus',
noisy_apply_dct=True,
add_offset=False,
add_double_lag_acorr=False,
**kwargs):
@ -122,13 +118,12 @@ def load_inference_data(path,
periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1)
num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1)
offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
# load signal, add back delay and pre-emphasize
signal = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15)
signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32)
create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_offset, add_double_lag_acorr)
create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr)
num_frames = min((len(signal) // 320) * 4, len(lpcs))
signal = signal[: num_frames * 80]
@ -138,11 +133,10 @@ def load_inference_data(path,
periods = periods[: num_frames]
num_bits = num_bits[: num_frames // 4]
num_bits_smooth = num_bits[: num_frames // 4]
offsets = offsets[: num_frames]
numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0)
features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods, offsets)
features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods)
if preemph > 0:
signal[1:] -= preemph * signal[:-1]

View file

@ -30,6 +30,7 @@
import math as m
import numpy as np
import scipy
import scipy.fftpack
import torch
def erb(f):

View file

@ -38,7 +38,8 @@ class CWriter:
create_state_struct=False,
enable_binary_blob=True,
model_struct_name="Model",
nnet_header="nnet.h"):
nnet_header="nnet.h",
add_typedef=False):
"""
Writer class for creating souce and header files for weight exports to C
@ -73,6 +74,7 @@ class CWriter:
self.enable_binary_blob = enable_binary_blob
self.create_state_struct = create_state_struct
self.model_struct_name = model_struct_name
self.add_typedef = add_typedef
# for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)
self.layer_dict = OrderedDict()
@ -119,11 +121,17 @@ f"""
# create model type
if self.enable_binary_blob:
self.header.write(f"\nstruct {self.model_struct_name} {{")
if self.add_typedef:
self.header.write(f"\ntypedef struct {{")
else:
self.header.write(f"\nstruct {self.model_struct_name} {{")
for name, data in self.layer_dict.items():
layer_type = data[0]
self.header.write(f"\n {layer_type} {name};")
self.header.write(f"\n}};\n")
if self.add_typedef:
self.header.write(f"\n}} {self.model_struct_name};\n")
else:
self.header.write(f"\n}};\n")
init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
self.header.write(f"\n{init_prototype};\n")

View file

@ -34,3 +34,4 @@ from .torch import dump_torch_gru_weights, load_torch_gru_weights
from .torch import dump_torch_grucell_weights
from .torch import dump_torch_embedding_weights, load_torch_embedding_weights
from .torch import dump_torch_weights, load_torch_weights
from .torch import dump_torch_adaptive_conv1d_weights

View file

@ -28,12 +28,154 @@
"""
import os
import sys
import torch
import numpy as np
sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))
try:
import utils.layers as osce_layers
from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
from utils.layers.td_shaper import TDShaper
has_osce=True
except:
has_osce=False
from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer
def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
if isinstance(where, CWriter):
# pad kernel for quantization
left_padding = adaconv.padding[0]
kernel_size = adaconv.kernel_size
in_channels = adaconv.in_channels
out_channels = adaconv.out_channels
feature_dim = adaconv.feature_dim
if quantize and kernel_size % 8:
kernel_padding = 8 - (kernel_size % 8)
w_kernel = np.concatenate(
(np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),
dtype=w_kernel.dtype,
axis=2).reshape(-1, feature_dim)
b_kernel = np.concatenate(
(np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),
dtype=b_kernel.dtype,
axis=2).reshape(-1)
left_padding += kernel_padding
kernel_size += kernel_padding
# write relevant scalar parameters to header file
where.header.write(f"""
#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f
#define {name.upper()}_KERNEL_SIZE {kernel_size}
#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
#define {name.upper()}_LEFT_PADDING {left_padding}
#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
#define {name.upper()}_NORM_P {adaconv.norm_p}
#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
"""
)
print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
else:
np.save(where, 'weight_kernel.npy', w_kernel)
np.save(where, 'bias_kernel.npy', b_kernel)
np.save(where, 'weight_gain.npy', w_gain)
np.save(where, 'bias_gain.npy', b_gain)
def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()
b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()
if isinstance(where, CWriter):
# pad kernel for quantization
left_padding = adaconv.padding[0]
kernel_size = adaconv.kernel_size
if quantize and w_kernel.shape[0] % 8:
kernel_padding = 8 - (w_kernel.shape[0] % 8)
w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)
b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)
left_padding += kernel_padding
kernel_size += kernel_padding
# write relevant scalar parameters to header file
where.header.write(f"""
#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f
#define {name.upper()}_KERNEL_SIZE {kernel_size}
#define {name.upper()}_LEFT_PADDING {left_padding}
#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
#define {name.upper()}_NORM_P {adaconv.norm_p}
#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
#define {name.upper()}_MAX_LAG {adaconv.max_lag}
"""
)
print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)
else:
np.save(where, 'weight_kernel.npy', w_kernel)
np.save(where, 'bias_kernel.npy', b_kernel)
np.save(where, 'weight_gain.npy', w_gain)
np.save(where, 'bias_gain.npy', b_gain)
np.save(where, 'weight_global_gain.npy', w_global_gain)
np.save(where, 'bias_global_gain.npy', b_global_gain)
def dump_torch_tdshaper(where, shaper, name='tdshaper'):
if isinstance(where, CWriter):
where.header.write(f"""
#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}
#define {name.upper()}_FRAME_SIZE {shaper.frame_size}
#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}
#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}
#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}
"""
)
dump_torch_conv1d_weights(where, shaper.feature_alpha1, name + "_alpha1")
dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")
if shaper.innovate:
dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")
dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")
dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")
dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")
def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
assert gru.num_layers == 1
@ -221,7 +363,6 @@ def load_torch_conv2d_weights(where, conv):
def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):
print("quantize = ", quantize)
w = embed.weight.detach().cpu().numpy().copy().transpose()
b = np.zeros(w.shape[0], dtype=w.dtype)
@ -257,11 +398,21 @@ def dump_torch_weights(where, module, name=None, verbose=False, **kwargs):
elif isinstance(module, torch.nn.Conv2d):
return dump_torch_conv2d_weights(where, module, name, **kwargs)
elif isinstance(module, torch.nn.Embedding):
return dump_torch_embedding_weights(where, module)
return dump_torch_embedding_weights(where, module, name, **kwargs)
elif isinstance(module, torch.nn.ConvTranspose1d):
return dump_torch_tconv1d_weights(where, module, name, **kwargs)
else:
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
if has_osce:
if isinstance(module, LimitedAdaptiveConv1d):
dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)
elif isinstance(module, LimitedAdaptiveComb1d):
dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)
elif isinstance(module, TDShaper):
dump_torch_tdshaper(where, module, name, **kwargs)
else:
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
else:
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
def load_torch_weights(where, module):
""" generic function for loading weights of some torch.nn.Module """

View file

@ -46,6 +46,10 @@
#include "plc_data.c"
#include "dred_rdovae_enc_data.c"
#include "dred_rdovae_dec_data.c"
#ifdef ENABLE_OSCE
#include "lace_data.c"
#include "nolace_data.c"
#endif
void write_weights(const WeightArray *list, FILE *fout)
{
@ -53,6 +57,9 @@ void write_weights(const WeightArray *list, FILE *fout)
unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
while (list[i].name != NULL) {
WeightHead h;
if (strlen(list[i].name) >= sizeof(h.name) - 1) {
printf("[write_weights] warning: name %s too long\n", list[i].name);
}
memcpy(h.head, "DNNw", 4);
h.version = WEIGHT_BLOB_VERSION;
h.type = list[i].type;
@ -77,6 +84,14 @@ int main(void)
write_weights(lpcnet_plc_arrays, fout);
write_weights(rdovaeenc_arrays, fout);
write_weights(rdovaedec_arrays, fout);
#ifdef ENABLE_OSCE
#ifndef DISABLE_LACE
write_weights(lacelayers_arrays, fout);
#endif
#ifndef DISABLE_NOLACE
write_weights(nolacelayers_arrays, fout);
#endif
#endif
fclose(fout);
return 0;
}

View file

@ -29,3 +29,12 @@ dnn/dred_rdovae_enc_data.h \
dnn/dred_rdovae_dec.h \
dnn/dred_rdovae_dec_data.h \
dnn/dred_rdovae_stats_data.h
OSCE_HEAD= \
dnn/osce.h \
dnn/osce_config.h \
dnn/osce_structs.h \
dnn/osce_features.h \
dnn/nndsp.h \
dnn/lace_data.h \
dnn/nolace_data.h

View file

@ -23,6 +23,13 @@ silk/dred_encoder.c \
silk/dred_coding.c \
silk/dred_decoder.c
OSCE_SOURCES = \
dnn/osce.c \
dnn/osce_features.c \
dnn/nndsp.c \
dnn/lace_data.c \
dnn/nolace_data.c
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c

View file

@ -148,6 +148,7 @@ opts = [
[ 'float-approx', 'FLOAT_APPROX' ],
[ 'enable-deep-plc', 'ENABLE_DEEP_PLC' ],
[ 'enable-dred', 'ENABLE_DRED' ],
[ 'enable-osce', 'ENABLE_OSCE' ],
[ 'assertions', 'ENABLE_ASSERTIONS' ],
[ 'hardening', 'ENABLE_HARDENING' ],
[ 'fuzzing', 'FUZZING' ],

View file

@ -9,6 +9,7 @@ option('intrinsics', type : 'feature', value : 'auto', description : 'Intrinsics
option('enable-deep-plc', type : 'boolean', value : false, description : 'Enable Deep Packet Loss Concealment (PLC)')
option('enable-dred', type : 'boolean', value : false, description : 'Enable Deep Redundancy (DRED)')
option('enable-osce', type : 'boolean', value : false, description : 'Enable Opus Speech Coding Enhancement (OSCE)')
option('enable-dnn-debug-float', type : 'boolean', value : false, description : 'Compute DNN using float weights')
option('custom-modes', type : 'boolean', value : false, description : 'Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames')

View file

@ -92,6 +92,16 @@ opus_int silk_Encode( /* O Returns error co
/* Decoder functions */
/****************************************/
/***********************************************/
/* Load OSCE models from external data pointer */
/***********************************************/
opus_int silk_LoadOSCEModels(
void *decState, /* O I/O State */
const unsigned char *data, /* I pointer to binary blob */
int len /* I length of binary blob data */
);
/***********************************************/
/* Get size in bytes of the Silk decoder state */
/***********************************************/
@ -100,8 +110,12 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
);
/*************************/
/* Init or Reset decoder */
/* Init and Reset decoder */
/*************************/
opus_int silk_ResetDecoder( /* O Returns error code */
void *decState /* I/O State */
);
opus_int silk_InitDecoder( /* O Returns error code */
void *decState /* I/O State */
);

View file

@ -147,6 +147,11 @@ typedef struct {
/* I: Enable Deep PLC */
opus_int enable_deep_plc;
#ifdef ENABLE_OSCE
/* I: OSCE method */
opus_int osce_method;
#endif
} silk_DecControlStruct;
#ifdef __cplusplus

View file

@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "os_support.h"
#ifdef ENABLE_OSCE
#include "osce.h"
#include "osce_structs.h"
#endif
/************************/
/* Decoder Super Struct */
/************************/
@ -42,12 +47,33 @@ typedef struct {
opus_int nChannelsAPI;
opus_int nChannelsInternal;
opus_int prev_decode_only_middle;
#ifdef ENABLE_OSCE
OSCEModel osce_model;
#endif
} silk_decoder;
/*********************/
/* Decoder functions */
/*********************/
opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len)
{
#ifdef ENABLE_OSCE
opus_int ret = SILK_NO_ERROR;
ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len);
return ret;
#else
(void) decState;
(void) data;
(void) len;
return SILK_NO_ERROR;
#endif
}
opus_int silk_Get_Decoder_Size( /* O Returns error code */
opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
)
@ -60,6 +86,24 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
}
/* Reset decoder state */
opus_int silk_ResetDecoder( /* O Returns error code */
void *decState /* I/O State */
)
{
opus_int n, ret = SILK_NO_ERROR;
silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
ret = silk_reset_decoder( &channel_state[ n ] );
}
silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
/* Not strictly needed, but it's cleaner that way */
((silk_decoder *)decState)->prev_decode_only_middle = 0;
return ret;
}
opus_int silk_InitDecoder( /* O Returns error code */
void *decState /* I/O State */
)
@ -67,6 +111,11 @@ opus_int silk_InitDecoder( /* O Returns error co
opus_int n, ret = SILK_NO_ERROR;
silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
#ifndef USE_WEIGHTS_FILE
/* load osce models */
silk_LoadOSCEModels(decState, NULL, 0);
#endif
for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
ret = silk_init_decoder( &channel_state[ n ] );
}
@ -301,9 +350,17 @@ opus_int silk_Decode( /* O Returns error co
} else {
condCoding = CODE_CONDITIONALLY;
}
#ifdef ENABLE_OSCE
if ( channel_state[n].osce.method != decControl->osce_method ) {
osce_reset( &channel_state[n].osce, decControl->osce_method );
}
#endif
ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding,
#ifdef ENABLE_DEEP_PLC
n == 0 ? lpcnet : NULL,
#endif
#ifdef ENABLE_OSCE
&psDec->osce_model,
#endif
arch);
} else {

View file

@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "PLC.h"
#ifdef ENABLE_OSCE
#include "osce.h"
#endif
/****************/
/* Decode frame */
/****************/
@ -45,17 +49,26 @@ opus_int silk_decode_frame(
opus_int condCoding, /* I The type of conditional coding to use */
#ifdef ENABLE_DEEP_PLC
LPCNetPLCState *lpcnet,
#endif
#ifdef ENABLE_OSCE
OSCEModel *osce_model,
#endif
int arch /* I Run-time architecture */
)
{
VARDECL( silk_decoder_control, psDecCtrl );
opus_int L, mv_len, ret = 0;
#ifdef ENABLE_OSCE
opus_int32 ec_start;
#endif
SAVE_STACK;
L = psDec->frame_length;
ALLOC( psDecCtrl, 1, silk_decoder_control );
psDecCtrl->LTP_scale_Q14 = 0;
#ifdef ENABLE_OSCE
ec_start = ec_tell(psRangeDec);
#endif
/* Safety checks */
celt_assert( L > 0 && L <= MAX_FRAME_LENGTH );
@ -87,6 +100,21 @@ opus_int silk_decode_frame(
/********************************************************/
silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
/*************************/
/* Update output buffer. */
/*************************/
celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
mv_len = psDec->ltp_mem_length - psDec->frame_length;
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
#ifdef ENABLE_OSCE
/********************************************************/
/* Run SILK enhancer */
/********************************************************/
osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
#endif
/********************************************************/
/* Update PLC state */
/********************************************************/
@ -109,15 +137,18 @@ opus_int silk_decode_frame(
lpcnet,
#endif
arch );
}
/*************************/
/* Update output buffer. */
/*************************/
celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
mv_len = psDec->ltp_mem_length - psDec->frame_length;
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
#ifdef ENABLE_OSCE
osce_reset( &psDec->osce, psDec->osce.method );
#endif
/*************************/
/* Update output buffer. */
/*************************/
celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
mv_len = psDec->ltp_mem_length - psDec->frame_length;
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
}
/************************************************/
/* Comfort noise generation / estimation */

View file

@ -31,15 +31,21 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
#ifdef ENABLE_OSCE
#include "osce.h"
#endif
#include "structs.h"
/************************/
/* Init Decoder State */
/* Reset Decoder State */
/************************/
opus_int silk_init_decoder(
opus_int silk_reset_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
)
{
/* Clear the entire encoder state, except anything copied */
silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) );
/* Used to deactivate LSF interpolation */
psDec->first_frame_after_reset = 1;
@ -52,6 +58,27 @@ opus_int silk_init_decoder(
/* Reset PLC state */
silk_PLC_Reset( psDec );
#ifdef ENABLE_OSCE
/* Reset OSCE state and method */
osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD);
#endif
return 0;
}
/************************/
/* Init Decoder State */
/************************/
opus_int silk_init_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
)
{
/* Clear the entire encoder state, except anything copied */
silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
silk_reset_decoder( psDec );
return(0);
}

View file

@ -389,6 +389,10 @@ void silk_NLSF_decode(
/****************************************************/
/* Decoder Functions */
/****************************************************/
opus_int silk_reset_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
);
opus_int silk_init_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
);
@ -412,6 +416,9 @@ opus_int silk_decode_frame(
opus_int condCoding, /* I The type of conditional coding to use */
#ifdef ENABLE_DEEP_PLC
LPCNetPLCState *lpcnet,
#endif
#ifdef ENABLE_OSCE
OSCEModel *osce_model,
#endif
int arch /* I Run-time architecture */
);

View file

@ -44,6 +44,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "dred_decoder.h"
#endif
#ifdef ENABLE_OSCE
#include "osce_config.h"
#include "osce_structs.h"
#endif
#ifdef __cplusplus
extern "C"
{
@ -238,6 +243,14 @@ typedef struct {
} silk_encoder_state;
#ifdef ENABLE_OSCE
typedef struct {
OSCEFeatureState features;
OSCEState state;
int method;
} silk_OSCE_struct;
#endif
/* Struct for Packet Loss Concealment */
typedef struct {
opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */
@ -270,6 +283,10 @@ typedef struct {
/* Decoder state */
/********************************/
typedef struct {
#ifdef ENABLE_OSCE
silk_OSCE_struct osce;
#endif
#define SILK_DECODER_STATE_RESET_START prev_gain_Q16
opus_int32 prev_gain_Q16;
opus_int32 exc_Q14[ MAX_FRAME_LENGTH ];
opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ];

View file

@ -161,4 +161,4 @@ silk/float/schur_FLP.c \
silk/float/sort_FLP.c
SILK_SOURCES_FLOAT_AVX2 = \
silk/float/x86/inner_product_FLP_avx2.c
silk/float/x86/inner_product_FLP_avx2.c

View file

@ -57,6 +57,10 @@
#include "dred_rdovae_dec.h"
#endif
#ifdef ENABLE_OSCE
#include "osce.h"
#endif
struct OpusDecoder {
int celt_dec_offset;
int silk_dec_offset;
@ -383,7 +387,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
pcm_ptr = pcm_silk;
if (st->prev_mode==MODE_CELT_ONLY)
silk_InitDecoder( silk_dec );
silk_ResetDecoder( silk_dec );
/* The SILK PLC cannot produce frames of less than 10 ms */
st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
@ -408,6 +412,15 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
}
}
st->DecControl.enable_deep_plc = st->complexity >= 5;
#ifdef ENABLE_OSCE
st->DecControl.osce_method = OSCE_METHOD_NONE;
#ifndef DISABLE_LACE
if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;}
#endif
#ifndef DISABLE_NOLACE
if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;}
#endif
#endif
lost_flag = data == NULL ? 1 : 2 * !!decode_fec;
decoded_samples = 0;
@ -953,7 +966,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
silk_InitDecoder( silk_dec );
silk_ResetDecoder( silk_dec );
st->stream_channels = st->channels;
st->frame_size = st->Fs/400;
#ifdef ENABLE_DEEP_PLC
@ -1044,6 +1057,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
goto bad_arg;
}
ret = lpcnet_plc_load_model(&st->lpcnet, data, len);
ret = silk_LoadOSCEModels(silk_dec, data, len) || ret;
}
break;
#endif

View file

@ -70,6 +70,10 @@ unsigned char *load_blob(const char *filename, int *len) {
FILE *file;
unsigned char *data;
file = fopen(filename, "r");
if (file == NULL)
{
perror("could not open blob file\n");
}
fseek(file, 0L, SEEK_END);
*len = ftell(file);
fseek(file, 0L, SEEK_SET);
@ -254,6 +258,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err
}
#endif
#ifdef ENABLE_OSCE_TRAINING_DATA
#define COMPLEXITY_MIN 0
#define COMPLEXITY_MAX 10
#define PACKET_LOSS_PERC_MIN 0
#define PACKET_LOSS_PERC_MAX 50
#define PACKET_LOSS_PERC_STEP 5
#define CBR_BITRATE_LIMIT 8000
#define NUM_BITRATES 102
static int bitrates[NUM_BITRATES] = {
6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420, 6480,
6525, 6561, 6598, 6634, 6670, 6707, 6743, 6780, 6816,
6853, 6889, 6926, 6962, 6999, 7042, 7085, 7128, 7171,
7215, 7258, 7301, 7344, 7388, 7431, 7474, 7512, 7541,
7570, 7599, 7628, 7657, 7686, 7715, 7744, 7773, 7802,
7831, 7860, 7889, 7918, 7947, 7976, 8013, 8096, 8179,
8262, 8344, 8427, 8511, 8605, 8699, 8792, 8886, 8980,
9100, 9227, 9354, 9480, 9561, 9634, 9706, 9779, 9851,
9924, 9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378,
11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795,
14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468,
22000, 32000, 64000
};
static int randint(int min, int max, int step)
{
double r = ((double) rand())/ (RAND_MAX + 1.);
int d;
d = ((int) ((max + 1 - min) * r / step) * step) + min;
return d;
}
static void new_random_setting(OpusEncoder *enc)
{
int bitrate_bps;
int complexity;
int packet_loss_perc;
int use_vbr;
bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)];
complexity = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1);
packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP);
use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1);
if (1)
{
printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr);
}
opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
}
#endif
int main(int argc, char *argv[])
{
int err;
@ -316,6 +382,10 @@ int main(int argc, char *argv[])
int lost_count=0;
FILE *packet_loss_file=NULL;
int dred_duration=0;
#ifdef ENABLE_OSCE_TRAINING_DATA
int silk_random_switching = 0;
int silk_frame_counter = 0;
#endif
#ifdef USE_WEIGHTS_FILE
int blob_len;
unsigned char *blob_data;
@ -546,6 +616,12 @@ int main(int argc, char *argv[])
mode_list = celt_hq_test;
nb_modes_in_list = 4;
args++;
#ifdef ENABLE_OSCE_TRAINING_DATA
} else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){
silk_random_switching = atoi( argv[ args + 1 ] );
printf("switching encoding parameters every %dth frame\n", silk_random_switching);
args += 2;
#endif
} else {
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
print_usage( argv );
@ -759,6 +835,15 @@ int main(int argc, char *argv[])
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
frame_size = mode_list[curr_mode][2];
}
#ifdef ENABLE_OSCE_TRAINING_DATA
if (silk_random_switching)
{
silk_frame_counter += 1;
if (silk_frame_counter % silk_random_switching == 0) {
new_random_setting(enc);
}
}
#endif
num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
curr_read = (int)num_read;
tot_in += curr_read;

View file

@ -50,6 +50,9 @@
#else
#include "float/structs_FLP.h"
#endif
#ifdef ENABLE_OSCE_TRAINING_DATA
#include <stdio.h>
#endif
#define MAX_ENCODER_BUFFER 480
@ -1693,6 +1696,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->application == OPUS_APPLICATION_VOIP)
{
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
#ifdef ENABLE_OSCE_TRAINING_DATA
/* write out high pass filtered clean signal*/
static FILE *fout =NULL;
if (fout == NULL)
{
fout = fopen("clean_hp.s16", "wb");
}
{
int idx;
opus_int16 tmp;
for (idx = 0; idx < frame_size; idx++)
{
tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
fwrite(&tmp, sizeof(tmp), 1, fout);
}
}
#endif
} else {
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}
@ -2909,7 +2931,9 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
{
goto bad_arg;
}
#ifdef ENABLE_DRED
ret = dred_encoder_load_model(&st->dred_encoder, data, len);
#endif
}
break;
#endif

View file

@ -103,7 +103,7 @@ opus_int32 test_dec_api(void)
for(c=0;c<4;c++)
{
i=opus_decoder_get_size(c);
if(((c==1||c==2)&&(i<=2048||i>1<<17))||((c!=1&&c!=2)&&i!=0))test_failed();
if(((c==1||c==2)&&(i<=2048||i>1<<18))||((c!=1&&c!=2)&&i!=0))test_failed();
fprintf(stdout," opus_decoder_get_size(%d)=%d ...............%s OK.\n",c,i,i>0?"":"....");
cfgs++;
}
@ -367,7 +367,7 @@ opus_int32 test_msdec_api(void)
for(b=-1;b<4;b++)
{
i=opus_multistream_decoder_get_size(a,b);
if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<17)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<18)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
fprintf(stdout," opus_multistream_decoder_get_size(%2d,%2d)=%d %sOK.\n",a,b,i,i>0?"":"... ");
cfgs++;
}