mirror of
https://github.com/xiph/opus.git
synced 2025-05-14 15:38:32 +00:00
Merge LACE/NoLACE under OSCE framework
This commit is contained in:
parent
591c8bad70
commit
7d328f5bfa
49 changed files with 4061 additions and 103 deletions
8
.github/workflows/autotools.yml
vendored
8
.github/workflows/autotools.yml
vendored
|
@ -29,6 +29,12 @@ jobs:
|
|||
compiler: gcc,
|
||||
buildconfig: --enable-assertions --enable-custom-modes
|
||||
}
|
||||
- {
|
||||
name: "Linux/GCC/EnableDNN",
|
||||
os: ubuntu-latest,
|
||||
compiler: gcc,
|
||||
buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce
|
||||
}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
# No AutoMake on Mac so let's install it
|
||||
|
@ -42,4 +48,4 @@ jobs:
|
|||
- name: Build
|
||||
run: make -j 2
|
||||
- name: Test
|
||||
run: make check -j 2
|
||||
run: make check -j 2
|
||||
|
|
2
.github/workflows/dred.yml
vendored
2
.github/workflows/dred.yml
vendored
|
@ -74,7 +74,7 @@ jobs:
|
|||
run: mkdir build
|
||||
- name: Configure
|
||||
working-directory: ./build
|
||||
run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON
|
||||
run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON
|
||||
- name: Build
|
||||
working-directory: ./build
|
||||
run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
|
||||
|
|
|
@ -64,9 +64,9 @@ autoconf:
|
|||
- !reference [.snippets, git_prep]
|
||||
script:
|
||||
- ./autogen.sh
|
||||
- CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx
|
||||
- CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce
|
||||
- make -j16
|
||||
- DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
|
||||
- DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
|
||||
cache:
|
||||
paths:
|
||||
- "src/*.o"
|
||||
|
@ -87,7 +87,7 @@ cmake:
|
|||
script:
|
||||
- ./autogen.sh
|
||||
- mkdir build
|
||||
- cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_X86_PRESUME_AVX2=ON
|
||||
- cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON
|
||||
- cmake --build build
|
||||
- cd build && ctest --output-on-failure -j 16
|
||||
|
||||
|
@ -101,7 +101,7 @@ cmake:
|
|||
script:
|
||||
- ./autogen.sh
|
||||
- mkdir builddir
|
||||
- meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
|
||||
- meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
|
||||
- meson compile -C builddir
|
||||
- meson test -C builddir
|
||||
#- meson dist --no-tests -C builddir
|
||||
|
|
|
@ -87,6 +87,10 @@ set(OPUS_DRED_HELP_STR "enable DRED.")
|
|||
option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
|
||||
|
||||
set(OPUS_OSCE_HELP_STR "enable OSCE.")
|
||||
option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
|
||||
|
||||
if(APPLE)
|
||||
set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
|
||||
option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
|
||||
|
@ -364,8 +368,6 @@ endif()
|
|||
|
||||
add_sources_group(opus silk ${silk_headers} ${silk_sources})
|
||||
add_sources_group(opus celt ${celt_headers} ${celt_sources})
|
||||
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
|
||||
add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
|
||||
|
||||
if(OPUS_FIXED_POINT)
|
||||
add_sources_group(opus silk ${silk_sources_fixed})
|
||||
|
@ -380,11 +382,26 @@ if(NOT OPUS_ENABLE_FLOAT_API)
|
|||
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
|
||||
endif()
|
||||
|
||||
if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
|
||||
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
|
||||
set(OPUS_DNN TRUE)
|
||||
else()
|
||||
set(OPUS_DNN FALSE)
|
||||
endif()
|
||||
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
|
||||
target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
|
||||
endif()
|
||||
|
||||
if (OPUS_DRED)
|
||||
add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
|
||||
target_compile_definitions(opus PRIVATE ENABLE_DRED)
|
||||
if(NOT OPUS_DEEP_PLC)
|
||||
target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (OPUS_OSCE)
|
||||
add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
|
||||
target_compile_definitions(opus PRIVATE ENABLE_OSCE)
|
||||
endif()
|
||||
|
||||
if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
|
@ -405,7 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
|||
endif()
|
||||
add_sources_group(opus celt ${celt_sources_x86_rtcd})
|
||||
add_sources_group(opus silk ${silk_sources_x86_rtcd})
|
||||
add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SSE1_SUPPORTED)
|
||||
|
@ -427,7 +446,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
|||
if(SSE2_SUPPORTED)
|
||||
if(OPUS_X86_MAY_HAVE_SSE2)
|
||||
add_sources_group(opus celt ${celt_sources_sse2})
|
||||
add_sources_group(opus lpcnet ${dnn_sources_sse2})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_sse2})
|
||||
endif()
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
|
||||
if(NOT MSVC)
|
||||
set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
|
||||
|
@ -445,7 +466,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
|||
if(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
add_sources_group(opus celt ${celt_sources_sse4_1})
|
||||
add_sources_group(opus silk ${silk_sources_sse4_1})
|
||||
add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
|
||||
endif()
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
if(NOT MSVC)
|
||||
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||
|
@ -471,7 +494,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
|||
add_sources_group(opus celt ${celt_sources_avx2})
|
||||
add_sources_group(opus silk ${silk_sources_avx2})
|
||||
add_sources_group(opus silk ${silk_sources_float_avx2})
|
||||
add_sources_group(opus lpcnet ${dnn_sources_avx2})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_avx2})
|
||||
endif()
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
|
||||
if(MSVC)
|
||||
set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
|
||||
|
@ -524,7 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
|||
|
||||
add_sources_group(opus celt ${celt_sources_arm_neon_intr})
|
||||
add_sources_group(opus silk ${silk_sources_arm_neon_intr})
|
||||
add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
|
||||
endif()
|
||||
|
||||
# silk arm neon depends on main_Fix.h
|
||||
target_include_directories(opus PRIVATE silk/fixed)
|
||||
|
|
|
@ -25,6 +25,9 @@ endif
|
|||
if ENABLE_DRED
|
||||
LPCNET_SOURCES += $(DRED_SOURCES)
|
||||
endif
|
||||
if ENABLE_OSCE
|
||||
LPCNET_SOURCES += $(OSCE_SOURCES)
|
||||
endif
|
||||
|
||||
if FIXED_POINT
|
||||
SILK_SOURCES += $(SILK_SOURCES_FIXED)
|
||||
|
@ -132,6 +135,9 @@ endif
|
|||
if ENABLE_DRED
|
||||
LPCNET_HEAD += $(DRED_HEAD)
|
||||
endif
|
||||
if ENABLE_OSCE
|
||||
LPCNET_HEAD += $(OSCE_HEAD)
|
||||
endif
|
||||
|
||||
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
|
||||
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
|
||||
|
|
|
@ -9,7 +9,7 @@ set -e
|
|||
srcdir=`dirname $0`
|
||||
test -n "$srcdir" && cd "$srcdir"
|
||||
|
||||
dnn/download_model.sh df63771
|
||||
dnn/download_model.sh 591c8ba
|
||||
|
||||
echo "Updating build configuration files, please wait...."
|
||||
|
||||
|
|
|
@ -42,8 +42,10 @@ get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)
|
|||
|
||||
get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
|
||||
get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
|
||||
get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
|
||||
get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
|
||||
get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
|
||||
get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
|
||||
get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
|
||||
get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
|
||||
get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
|
||||
|
|
29
configure.ac
29
configure.ac
|
@ -175,10 +175,10 @@ AC_ARG_ENABLE([deep-plc],
|
|||
[AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
|
||||
[enable_deep_plc=no])
|
||||
|
||||
AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"],[
|
||||
AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
|
||||
AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
|
||||
])
|
||||
AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes"])
|
||||
AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
|
||||
|
||||
has_float_approx=no
|
||||
case "$host_cpu" in
|
||||
|
@ -904,6 +904,31 @@ AS_IF([test "$enable_dnn_debug_float" = "no"], [
|
|||
AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([osce-training-data],
|
||||
AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
|
||||
[enable_osc_training_data=no]
|
||||
)
|
||||
|
||||
AS_IF([test "$enable_osce_training_data" = "yes"], [
|
||||
AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([argument osce training data])
|
||||
AS_IF([test "$enable_osce_training_data" = "yes"], [
|
||||
AC_MSG_RESULT([yes])
|
||||
], [AC_MSG_RESULT([no])])
|
||||
|
||||
AC_ARG_ENABLE([osce],
|
||||
AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
|
||||
[enable_osce=no]
|
||||
)
|
||||
|
||||
AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
|
||||
AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
|
||||
])
|
||||
|
||||
AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
|
||||
|
||||
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
|
||||
|
||||
AC_ARG_ENABLE([extra-programs],
|
||||
|
|
449
dnn/adaconvtest.c
Normal file
449
dnn/adaconvtest.c
Normal file
|
@ -0,0 +1,449 @@
|
|||
#include "lace_data.h"
|
||||
#include "nolace_data.h"
|
||||
#include "osce.h"
|
||||
#include "nndsp.h"
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
extern const WeightArray lacelayers_arrays[];
|
||||
extern const WeightArray nolacelayers_arrays[];
|
||||
|
||||
void adaconv_compare(
|
||||
const char * prefix,
|
||||
int num_frames,
|
||||
AdaConvState* hAdaConv,
|
||||
LinearLayer *kernel_layer,
|
||||
LinearLayer *gain_layer,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float shape_gain
|
||||
)
|
||||
{
|
||||
char feature_file[256];
|
||||
char x_in_file[256];
|
||||
char x_out_file[256];
|
||||
char message[512];
|
||||
int i_frame, i_sample;
|
||||
float mse;
|
||||
float features[512];
|
||||
float x_in[512];
|
||||
float x_out_ref[512];
|
||||
float x_out[512];
|
||||
float window[40];
|
||||
|
||||
init_adaconv_state(hAdaConv);
|
||||
compute_overlap_window(window, 40);
|
||||
|
||||
FILE *f_features, *f_x_in, *f_x_out;
|
||||
|
||||
strcpy(feature_file, prefix);
|
||||
strcat(feature_file, "_features.f32");
|
||||
f_features = fopen(feature_file, "r");
|
||||
if (f_features == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", feature_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_in_file, prefix);
|
||||
strcat(x_in_file, "_x_in.f32");
|
||||
f_x_in = fopen(x_in_file, "r");
|
||||
if (f_x_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_out_file, prefix);
|
||||
strcat(x_out_file, "_x_out.f32");
|
||||
f_x_out = fopen(x_out_file, "r");
|
||||
if (f_x_out == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_out_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i_frame = 0; i_frame < num_frames; i_frame ++)
|
||||
{
|
||||
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
|
||||
frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
|
||||
filter_gain_a, filter_gain_b, shape_gain, window, 0);
|
||||
|
||||
mse = 0;
|
||||
for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
|
||||
{
|
||||
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
|
||||
}
|
||||
mse = sqrt(mse / (frame_size * out_channels));
|
||||
printf("rmse[%d] %f\n", i_frame, mse);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void adacomb_compare(
|
||||
const char * prefix,
|
||||
int num_frames,
|
||||
AdaCombState* hAdaComb,
|
||||
LinearLayer *kernel_layer,
|
||||
LinearLayer *gain_layer,
|
||||
LinearLayer *global_gain_layer,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float log_gain_limit
|
||||
)
|
||||
{
|
||||
char feature_file[256];
|
||||
char x_in_file[256];
|
||||
char p_in_file[256];
|
||||
char x_out_file[256];
|
||||
char message[512];
|
||||
int i_frame, i_sample;
|
||||
float mse;
|
||||
float features[512];
|
||||
float x_in[512];
|
||||
float x_out_ref[512];
|
||||
float x_out[512];
|
||||
int pitch_lag;
|
||||
float window[40];
|
||||
|
||||
init_adacomb_state(hAdaComb);
|
||||
compute_overlap_window(window, 40);
|
||||
|
||||
FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
|
||||
|
||||
strcpy(feature_file, prefix);
|
||||
strcat(feature_file, "_features.f32");
|
||||
f_features = fopen(feature_file, "r");
|
||||
if (f_features == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", feature_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_in_file, prefix);
|
||||
strcat(x_in_file, "_x_in.f32");
|
||||
f_x_in = fopen(x_in_file, "r");
|
||||
if (f_x_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(p_in_file, prefix);
|
||||
strcat(p_in_file, "_p_in.s32");
|
||||
f_p_in = fopen(p_in_file, "r");
|
||||
if (f_p_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", p_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_out_file, prefix);
|
||||
strcat(x_out_file, "_x_out.f32");
|
||||
f_x_out = fopen(x_out_file, "r");
|
||||
if (f_x_out == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_out_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i_frame = 0; i_frame < num_frames; i_frame ++)
|
||||
{
|
||||
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
|
||||
pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
|
||||
|
||||
|
||||
mse = 0;
|
||||
for (i_sample = 0; i_sample < frame_size; i_sample ++)
|
||||
{
|
||||
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
|
||||
}
|
||||
mse = sqrt(mse / (frame_size));
|
||||
printf("rmse[%d] %f\n", i_frame, mse);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void adashape_compare(
|
||||
const char * prefix,
|
||||
int num_frames,
|
||||
AdaShapeState* hAdaShape,
|
||||
LinearLayer *alpha1,
|
||||
LinearLayer *alpha2,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int avg_pool_k
|
||||
)
|
||||
{
|
||||
char feature_file[256];
|
||||
char x_in_file[256];
|
||||
char x_out_file[256];
|
||||
char message[512];
|
||||
int i_frame, i_sample;
|
||||
float mse;
|
||||
float features[512];
|
||||
float x_in[512];
|
||||
float x_out_ref[512];
|
||||
float x_out[512];
|
||||
|
||||
init_adashape_state(hAdaShape);
|
||||
|
||||
FILE *f_features, *f_x_in, *f_x_out;
|
||||
|
||||
strcpy(feature_file, prefix);
|
||||
strcat(feature_file, "_features.f32");
|
||||
f_features = fopen(feature_file, "r");
|
||||
if (f_features == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", feature_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_in_file, prefix);
|
||||
strcat(x_in_file, "_x_in.f32");
|
||||
f_x_in = fopen(x_in_file, "r");
|
||||
if (f_x_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_out_file, prefix);
|
||||
strcat(x_out_file, "_x_out.f32");
|
||||
f_x_out = fopen(x_out_file, "r");
|
||||
if (f_x_out == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_out_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i_frame = 0; i_frame < num_frames; i_frame ++)
|
||||
{
|
||||
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
|
||||
frame_size, avg_pool_k, 0);
|
||||
|
||||
mse = 0;
|
||||
for (i_sample = 0; i_sample < frame_size; i_sample ++)
|
||||
{
|
||||
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
|
||||
}
|
||||
mse = sqrt(mse / (frame_size));
|
||||
printf("rmse[%d] %f\n", i_frame, mse);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
LACELayers hLACE;
|
||||
NOLACELayers hNoLACE;
|
||||
|
||||
AdaConvState hAdaConv;
|
||||
AdaCombState hAdaComb;
|
||||
AdaShapeState hAdaShape;
|
||||
|
||||
init_adaconv_state(&hAdaConv);
|
||||
|
||||
init_lacelayers(&hLACE, lacelayers_arrays);
|
||||
init_nolacelayers(&hNoLACE, nolacelayers_arrays);
|
||||
|
||||
printf("\ntesting lace.af1 (1 in, 1 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/lace_af1",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hLACE.lace_af1_kernel,
|
||||
&hLACE.lace_af1_gain,
|
||||
LACE_AF1_FEATURE_DIM,
|
||||
LACE_AF1_FRAME_SIZE,
|
||||
LACE_AF1_OVERLAP_SIZE,
|
||||
LACE_AF1_IN_CHANNELS,
|
||||
LACE_AF1_OUT_CHANNELS,
|
||||
LACE_AF1_KERNEL_SIZE,
|
||||
LACE_AF1_LEFT_PADDING,
|
||||
LACE_AF1_FILTER_GAIN_A,
|
||||
LACE_AF1_FILTER_GAIN_B,
|
||||
LACE_AF1_SHAPE_GAIN
|
||||
);
|
||||
|
||||
|
||||
printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/nolace_af1",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hNoLACE.nolace_af1_kernel,
|
||||
&hNoLACE.nolace_af1_gain,
|
||||
NOLACE_AF1_FEATURE_DIM,
|
||||
NOLACE_AF1_FRAME_SIZE,
|
||||
NOLACE_AF1_OVERLAP_SIZE,
|
||||
NOLACE_AF1_IN_CHANNELS,
|
||||
NOLACE_AF1_OUT_CHANNELS,
|
||||
NOLACE_AF1_KERNEL_SIZE,
|
||||
NOLACE_AF1_LEFT_PADDING,
|
||||
NOLACE_AF1_FILTER_GAIN_A,
|
||||
NOLACE_AF1_FILTER_GAIN_B,
|
||||
NOLACE_AF1_SHAPE_GAIN
|
||||
);
|
||||
|
||||
|
||||
printf("testing nolace.af4 (2 in, 1 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/nolace_af4",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hNoLACE.nolace_af4_kernel,
|
||||
&hNoLACE.nolace_af4_gain,
|
||||
NOLACE_AF4_FEATURE_DIM,
|
||||
NOLACE_AF4_FRAME_SIZE,
|
||||
NOLACE_AF4_OVERLAP_SIZE,
|
||||
NOLACE_AF4_IN_CHANNELS,
|
||||
NOLACE_AF4_OUT_CHANNELS,
|
||||
NOLACE_AF4_KERNEL_SIZE,
|
||||
NOLACE_AF4_LEFT_PADDING,
|
||||
NOLACE_AF4_FILTER_GAIN_A,
|
||||
NOLACE_AF4_FILTER_GAIN_B,
|
||||
NOLACE_AF4_SHAPE_GAIN
|
||||
);
|
||||
|
||||
printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/nolace_af2",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hNoLACE.nolace_af2_kernel,
|
||||
&hNoLACE.nolace_af2_gain,
|
||||
NOLACE_AF2_FEATURE_DIM,
|
||||
NOLACE_AF2_FRAME_SIZE,
|
||||
NOLACE_AF2_OVERLAP_SIZE,
|
||||
NOLACE_AF2_IN_CHANNELS,
|
||||
NOLACE_AF2_OUT_CHANNELS,
|
||||
NOLACE_AF2_KERNEL_SIZE,
|
||||
NOLACE_AF2_LEFT_PADDING,
|
||||
NOLACE_AF2_FILTER_GAIN_A,
|
||||
NOLACE_AF2_FILTER_GAIN_B,
|
||||
NOLACE_AF2_SHAPE_GAIN
|
||||
);
|
||||
|
||||
printf("\ntesting lace.cf1...\n");
|
||||
adacomb_compare(
|
||||
"testvectors/lace_cf1",
|
||||
5,
|
||||
&hAdaComb,
|
||||
&hLACE.lace_cf1_kernel,
|
||||
&hLACE.lace_cf1_gain,
|
||||
&hLACE.lace_cf1_global_gain,
|
||||
LACE_CF1_FEATURE_DIM,
|
||||
LACE_CF1_FRAME_SIZE,
|
||||
LACE_CF1_OVERLAP_SIZE,
|
||||
LACE_CF1_KERNEL_SIZE,
|
||||
LACE_CF1_LEFT_PADDING,
|
||||
LACE_CF1_FILTER_GAIN_A,
|
||||
LACE_CF1_FILTER_GAIN_B,
|
||||
LACE_CF1_LOG_GAIN_LIMIT
|
||||
);
|
||||
|
||||
printf("\ntesting nolace.tdshape1...\n");
|
||||
adashape_compare(
|
||||
"testvectors/nolace_tdshape1",
|
||||
5,
|
||||
&hAdaShape,
|
||||
&hNoLACE.nolace_tdshape1_alpha1,
|
||||
&hNoLACE.nolace_tdshape1_alpha2,
|
||||
NOLACE_TDSHAPE1_FEATURE_DIM,
|
||||
NOLACE_TDSHAPE1_FRAME_SIZE,
|
||||
NOLACE_TDSHAPE1_AVG_POOL_K
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */
|
|
@ -5,6 +5,11 @@ if opt_enable_dred
|
|||
dnn_sources += dred_sources
|
||||
endif
|
||||
|
||||
osce_sources = sources['OSCE_SOURCES']
|
||||
if opt_enable_osce
|
||||
dnn_sources += osce_sources
|
||||
endif
|
||||
|
||||
dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
|
||||
dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
|
||||
dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
|
||||
|
|
412
dnn/nndsp.c
Normal file
412
dnn/nndsp.c
Normal file
|
@ -0,0 +1,412 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "nndsp.h"
|
||||
#include "arch.h"
|
||||
#include "nnet.h"
|
||||
#include "os_support.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.141592653589793f
|
||||
#endif
|
||||
|
||||
#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
|
||||
|
||||
void init_adaconv_state(AdaConvState *hAdaConv)
|
||||
{
|
||||
OPUS_CLEAR(hAdaConv, 1);
|
||||
}
|
||||
|
||||
void init_adacomb_state(AdaCombState *hAdaComb)
|
||||
{
|
||||
OPUS_CLEAR(hAdaComb, 1);
|
||||
}
|
||||
|
||||
void init_adashape_state(AdaShapeState *hAdaShape)
|
||||
{
|
||||
OPUS_CLEAR(hAdaShape, 1);
|
||||
}
|
||||
|
||||
void compute_overlap_window(float *window, int overlap_size)
|
||||
{
|
||||
int i_sample;
|
||||
for (i_sample=0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
void print_float_vector(const char* name, const float *vec, int length)
|
||||
{
|
||||
for (int i = 0; i < length; i ++)
|
||||
{
|
||||
printf("%s[%d]: %f\n", name, i, vec[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void scale_kernel(
|
||||
float *kernel,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
float *gain
|
||||
)
|
||||
/* normalizes (p-norm) kernel over input channel and kernel dimension */
|
||||
{
|
||||
float norm;
|
||||
int i_in_channels, i_out_channels, i_kernel;
|
||||
|
||||
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
|
||||
{
|
||||
norm = 0;
|
||||
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
|
||||
{
|
||||
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
|
||||
{
|
||||
norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_NNDSP
|
||||
printf("kernel norm: %f, %f\n", norm, sqrt(norm));
|
||||
#endif
|
||||
norm = 1.f / (1e-6f + sqrt(norm));
|
||||
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
|
||||
{
|
||||
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
|
||||
{
|
||||
|
||||
kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void transform_gains(
|
||||
float *gains,
|
||||
int num_gains,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b
|
||||
)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < num_gains; i++)
|
||||
{
|
||||
gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
|
||||
}
|
||||
}
|
||||
|
||||
void adaconv_process_frame(
|
||||
AdaConvState* hAdaConv,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float shape_gain,
|
||||
float *window,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
|
||||
float kernel0[ADACONV_MAX_KERNEL_SIZE];
|
||||
float kernel1[ADACONV_MAX_KERNEL_SIZE];
|
||||
float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
|
||||
float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
|
||||
float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float *p_input;
|
||||
int i_in_channels, i_out_channels, i_sample;
|
||||
|
||||
(void) feature_dim; /* ToDo: figure out whether we might need this information */
|
||||
|
||||
celt_assert(shape_gain == 1);
|
||||
celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
|
||||
celt_assert(kernel_size < frame_size);
|
||||
|
||||
OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
|
||||
OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
|
||||
OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("x_in", x_in, in_channels * frame_size);
|
||||
#endif
|
||||
|
||||
/* prepare input */
|
||||
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
|
||||
{
|
||||
OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
|
||||
OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
|
||||
}
|
||||
p_input = input_buffer + kernel_size;
|
||||
|
||||
|
||||
/* calculate new kernel and new gain */
|
||||
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
|
||||
compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("features", features, feature_dim);
|
||||
print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
|
||||
print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
|
||||
#endif
|
||||
transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
|
||||
scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
|
||||
print_float_vector("adaconv_gain", gain_buffer, out_channels);
|
||||
#endif
|
||||
|
||||
/* calculate overlapping part using kernel from last frame */
|
||||
|
||||
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
|
||||
{
|
||||
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
|
||||
{
|
||||
OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
|
||||
OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
|
||||
|
||||
OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
|
||||
OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
|
||||
celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
|
||||
celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
|
||||
for (i_sample = 0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
|
||||
output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
|
||||
}
|
||||
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("x_out", x_out, out_channels * frame_size);
|
||||
#endif
|
||||
|
||||
/* buffer update */
|
||||
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
|
||||
{
|
||||
OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
|
||||
}
|
||||
OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
|
||||
}
|
||||
|
||||
void adacomb_process_frame(
|
||||
AdaCombState* hAdaComb,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
const LinearLayer *global_gain_layer,
|
||||
int pitch_lag,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float log_gain_limit,
|
||||
float *window,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float output_buffer[ADACOMB_MAX_FRAME_SIZE];
|
||||
float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
|
||||
float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
|
||||
float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
|
||||
float gain, global_gain;
|
||||
float *p_input;
|
||||
int i_sample;
|
||||
float kernel[16];
|
||||
float last_kernel[16];
|
||||
|
||||
(void) feature_dim; /* ToDo: figure out whether we might need this information */
|
||||
|
||||
OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
|
||||
OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
|
||||
OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
|
||||
|
||||
OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
|
||||
OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
|
||||
p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
|
||||
|
||||
/* calculate new kernel and new gain */
|
||||
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
|
||||
compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
|
||||
compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("features", features, feature_dim);
|
||||
print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
|
||||
print_float_vector("adacomb_gain_raw", &gain, 1);
|
||||
print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
|
||||
#endif
|
||||
gain = exp(log_gain_limit - gain);
|
||||
global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
|
||||
scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
|
||||
print_float_vector("adacomb_gain", &gain, 1);
|
||||
#endif
|
||||
|
||||
OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
|
||||
OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
|
||||
OPUS_COPY(kernel, kernel_buffer, kernel_size);
|
||||
OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
|
||||
|
||||
celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
|
||||
|
||||
celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
|
||||
for (i_sample = 0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
|
||||
}
|
||||
|
||||
for (i_sample = 0; i_sample < overlap_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
|
||||
}
|
||||
|
||||
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
|
||||
{
|
||||
output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
|
||||
}
|
||||
OPUS_COPY(x_out, output_buffer, frame_size);
|
||||
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("x_out", x_out, frame_size);
|
||||
#endif
|
||||
|
||||
/* buffer update */
|
||||
OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
|
||||
OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
|
||||
hAdaComb->last_pitch_lag = pitch_lag;
|
||||
hAdaComb->last_global_gain = global_gain;
|
||||
}
|
||||
|
||||
|
||||
void adashape_process_frame(
|
||||
AdaShapeState *hAdaShape,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *alpha1,
|
||||
const LinearLayer *alpha2,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int avg_pool_k,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
|
||||
float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
|
||||
int i, k;
|
||||
int tenv_size;
|
||||
float mean;
|
||||
float *tenv;
|
||||
|
||||
celt_assert(frame_size % avg_pool_k == 0);
|
||||
celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
|
||||
|
||||
tenv_size = frame_size / avg_pool_k;
|
||||
tenv = in_buffer + feature_dim;
|
||||
OPUS_CLEAR(tenv, tenv_size + 1);
|
||||
|
||||
OPUS_COPY(in_buffer, features, feature_dim);
|
||||
|
||||
/* calculate temporal envelope */
|
||||
mean = 0;
|
||||
for (i = 0; i < tenv_size; i++)
|
||||
{
|
||||
for (k = 0; k < avg_pool_k; k++)
|
||||
{
|
||||
tenv[i] += fabs(x_in[i * avg_pool_k + k]);
|
||||
}
|
||||
tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
|
||||
mean += tenv[i];
|
||||
}
|
||||
mean /= tenv_size;
|
||||
for (i = 0; i < tenv_size; i++)
|
||||
{
|
||||
tenv[i] -= mean;
|
||||
}
|
||||
tenv[tenv_size] = mean;
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("tenv", tenv, tenv_size + 1);
|
||||
#endif
|
||||
|
||||
/* calculate temporal weights */
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
|
||||
#endif
|
||||
compute_generic_conv1d(alpha1, out_buffer, hAdaShape->conv_alpha1_state, in_buffer, feature_dim + tenv_size + 1, ACTIVATION_LINEAR, arch);
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("alpha1_out", out_buffer, frame_size);
|
||||
#endif
|
||||
/* compute leaky ReLU by hand. ToDo: try tanh activation */
|
||||
for (i = 0; i < frame_size; i ++)
|
||||
{
|
||||
in_buffer[i] = out_buffer[i] >= 0 ? out_buffer[i] : 0.2f * out_buffer[i];
|
||||
}
|
||||
#ifdef DEBUG_NNDSP
|
||||
print_float_vector("post_alpha1", in_buffer, frame_size);
|
||||
#endif
|
||||
compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
|
||||
|
||||
/* shape signal */
|
||||
for (i = 0; i < frame_size; i ++)
|
||||
{
|
||||
x_out[i] = exp(out_buffer[i]) * x_in[i];
|
||||
}
|
||||
|
||||
}
|
141
dnn/nndsp.h
Normal file
141
dnn/nndsp.h
Normal file
|
@ -0,0 +1,141 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NNDSP_H
|
||||
#define NNDSP_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "nnet.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
#define ADACONV_MAX_KERNEL_SIZE 16
|
||||
#define ADACONV_MAX_INPUT_CHANNELS 2
|
||||
#define ADACONV_MAX_OUTPUT_CHANNELS 2
|
||||
#define ADACONV_MAX_FRAME_SIZE 80
|
||||
#define ADACONV_MAX_OVERLAP_SIZE 40
|
||||
|
||||
#define ADACOMB_MAX_LAG 300
|
||||
#define ADACOMB_MAX_KERNEL_SIZE 16
|
||||
#define ADACOMB_MAX_FRAME_SIZE 80
|
||||
#define ADACOMB_MAX_OVERLAP_SIZE 40
|
||||
|
||||
#define ADASHAPE_MAX_INPUT_DIM 512
|
||||
#define ADASHAPE_MAX_FRAME_SIZE 160
|
||||
|
||||
/*#define DEBUG_NNDSP*/
|
||||
#ifdef DEBUG_NNDSP
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
void print_float_vector(const char* name, const float *vec, int length);
|
||||
|
||||
typedef struct {
|
||||
float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
|
||||
float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
|
||||
float last_gain;
|
||||
} AdaConvState;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
|
||||
float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
|
||||
float last_global_gain;
|
||||
int last_pitch_lag;
|
||||
} AdaCombState;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float conv_alpha1_state[ADASHAPE_MAX_INPUT_DIM];
|
||||
float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
|
||||
} AdaShapeState;
|
||||
|
||||
void init_adaconv_state(AdaConvState *hAdaConv);
|
||||
|
||||
void init_adacomb_state(AdaCombState *hAdaComb);
|
||||
|
||||
void init_adashape_state(AdaShapeState *hAdaShape);
|
||||
|
||||
void compute_overlap_window(float *window, int overlap_size);
|
||||
|
||||
void adaconv_process_frame(
|
||||
AdaConvState* hAdaConv,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
int feature_dim, /* not strictly necessary */
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float shape_gain,
|
||||
float *window,
|
||||
int arch
|
||||
);
|
||||
|
||||
void adacomb_process_frame(
|
||||
AdaCombState* hAdaComb,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *kernel_layer,
|
||||
const LinearLayer *gain_layer,
|
||||
const LinearLayer *global_gain_layer,
|
||||
int pitch_lag,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float log_gain_limit,
|
||||
float *window,
|
||||
int arch
|
||||
);
|
||||
|
||||
void adashape_process_frame(
|
||||
AdaShapeState *hAdaShape,
|
||||
float *x_out,
|
||||
const float *x_in,
|
||||
const float *features,
|
||||
const LinearLayer *alpha1,
|
||||
const LinearLayer *alpha2,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int avg_pool_k,
|
||||
int arch
|
||||
);
|
||||
|
||||
#endif
|
|
@ -41,6 +41,10 @@
|
|||
#include "os_support.h"
|
||||
#include "vec.h"
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce_config.h"
|
||||
#endif
|
||||
|
||||
#ifdef NO_OPTIMIZATIONS
|
||||
#if defined(_MSC_VER)
|
||||
#pragma message ("Compiling without any vectorization. This code will be very slow")
|
||||
|
@ -59,8 +63,11 @@ void compute_generic_dense(const LinearLayer *layer, float *output, const float
|
|||
compute_activation(output, output, layer->nb_outputs, activation, arch);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
|
||||
#else
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)
|
||||
|
||||
#endif
|
||||
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
|
||||
{
|
||||
|
|
|
@ -64,13 +64,29 @@ static OPUS_INLINE float relu(float x)
|
|||
return x < 0 ? 0 : x;
|
||||
}
|
||||
|
||||
/*#define HIGH_ACCURACY */
|
||||
|
||||
void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
|
||||
{
|
||||
int i;
|
||||
if (activation == ACTIVATION_SIGMOID) {
|
||||
#ifdef HIGH_ACCURACY
|
||||
for (int n=0; n<N; n++)
|
||||
{
|
||||
output[n] = 1.f / (1 + exp(-input[n]));
|
||||
}
|
||||
#else
|
||||
vec_sigmoid(output, input, N);
|
||||
#endif
|
||||
} else if (activation == ACTIVATION_TANH) {
|
||||
#ifdef HIGH_ACCURACY
|
||||
for (int n=0; n<N; n++)
|
||||
{
|
||||
output[n] = tanh(input[n]);
|
||||
}
|
||||
#else
|
||||
vec_tanh(output, input, N);
|
||||
#endif
|
||||
} else if (activation == ACTIVATION_SWISH) {
|
||||
vec_swish(output, input, N);
|
||||
} else if (activation == ACTIVATION_RELU) {
|
||||
|
|
1411
dnn/osce.c
Normal file
1411
dnn/osce.c
Normal file
File diff suppressed because it is too large
Load diff
81
dnn/osce.h
Normal file
81
dnn/osce.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_H
|
||||
#define OSCE_H
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
/*#include "osce_config.h"*/
|
||||
#ifndef DISABLE_LACE
|
||||
#include "lace_data.h"
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
#include "nolace_data.h"
|
||||
#endif
|
||||
#include "nndsp.h"
|
||||
#include "nnet.h"
|
||||
#include "osce_structs.h"
|
||||
#include "structs.h"
|
||||
|
||||
#define OSCE_METHOD_NONE 0
|
||||
#ifndef DISABLE_LACE
|
||||
#define OSCE_METHOD_LACE 1
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
#define OSCE_METHOD_NOLACE 2
|
||||
#endif
|
||||
|
||||
#if !defined(DISABLE_NOLACE)
|
||||
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NOLACE
|
||||
#elif !defined(DISABLE_LACE)
|
||||
#define OSCE_DEFAULT_METHOD OSCE_METHOD_LACE
|
||||
#else
|
||||
#define OSCE_DEFAULT_METHOD OSCE_METHOD_NONE
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/* API */
|
||||
|
||||
|
||||
void osce_enhance_frame(
|
||||
OSCEModel *model, /* I OSCE model struct */
|
||||
silk_decoder_state *psDec, /* I/O Decoder state */
|
||||
silk_decoder_control *psDecCtrl, /* I Decoder control */
|
||||
opus_int16 xq[], /* I/O Decoded speech */
|
||||
opus_int32 num_bits, /* I Size of SILK payload in bits */
|
||||
int arch /* I Run-time architecture */
|
||||
);
|
||||
|
||||
|
||||
int osce_load_models(OSCEModel *hModel, const unsigned char *data, int len);
|
||||
void osce_reset(silk_OSCE_struct *hOSCE, int method);
|
||||
|
||||
|
||||
#endif
|
62
dnn/osce_config.h
Normal file
62
dnn/osce_config.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_CONFIG
|
||||
#define OSCE_CONFIG
|
||||
|
||||
#define OSCE_MAX_RNN_NEURONS 256
|
||||
|
||||
#define OSCE_FEATURES_MAX_HISTORY 350
|
||||
#define OSCE_FEATURE_DIM 93
|
||||
#define OSCE_MAX_FEATURE_FRAMES 4
|
||||
|
||||
#define OSCE_CLEAN_SPEC_NUM_BANDS 64
|
||||
#define OSCE_NOISY_SPEC_NUM_BANDS 18
|
||||
|
||||
#define OSCE_NO_PITCH_VALUE 7
|
||||
|
||||
#define OSCE_PREEMPH 0.85f
|
||||
|
||||
#define OSCE_PITCH_HANGOVER 8
|
||||
|
||||
#define OSCE_CLEAN_SPEC_START 0
|
||||
#define OSCE_CLEAN_SPEC_LENGTH 64
|
||||
|
||||
#define OSCE_NOISY_CEPSTRUM_START 64
|
||||
#define OSCE_NOISY_CEPSTRUM_LENGTH 18
|
||||
|
||||
#define OSCE_ACORR_START 82
|
||||
#define OSCE_ACORR_LENGTH 5
|
||||
|
||||
#define OSCE_LTP_START 87
|
||||
#define OSCE_LTP_LENGTH 5
|
||||
|
||||
#define OSCE_LOG_GAIN_START 92
|
||||
#define OSCE_LOG_GAIN_LENGTH 1
|
||||
|
||||
|
||||
#endif
|
454
dnn/osce_features.c
Normal file
454
dnn/osce_features.c
Normal file
|
@ -0,0 +1,454 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#define OSCE_SPEC_WINDOW_SIZE 320
|
||||
#define OSCE_SPEC_NUM_FREQS 161
|
||||
|
||||
|
||||
/*DEBUG*/
|
||||
/*#define WRITE_FEATURES*/
|
||||
/*#define DEBUG_PRING*/
|
||||
/*******/
|
||||
|
||||
#include "stack_alloc.h"
|
||||
#include "osce_features.h"
|
||||
#include "kiss_fft.h"
|
||||
#include "os_support.h"
|
||||
#include "osce.h"
|
||||
#include "freq.h"
|
||||
|
||||
|
||||
#if defined(WRITE_FEATURES) || defined(DEBUG_PRING)
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static const int center_bins_clean[64] = {
|
||||
0, 2, 5, 8, 10, 12, 15, 18,
|
||||
20, 22, 25, 28, 30, 33, 35, 38,
|
||||
40, 42, 45, 48, 50, 52, 55, 58,
|
||||
60, 62, 65, 68, 70, 73, 75, 78,
|
||||
80, 82, 85, 88, 90, 92, 95, 98,
|
||||
100, 102, 105, 108, 110, 112, 115, 118,
|
||||
120, 122, 125, 128, 130, 132, 135, 138,
|
||||
140, 142, 145, 148, 150, 152, 155, 160
|
||||
};
|
||||
|
||||
static const int center_bins_noisy[18] = {
|
||||
0, 4, 8, 12, 16, 20, 24, 28,
|
||||
32, 40, 48, 56, 64, 80, 96, 112,
|
||||
136, 160
|
||||
};
|
||||
|
||||
static const float band_weights_clean[64] = {
|
||||
0.666666666667f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.400000000000f, 0.400000000000f, 0.400000000000f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.333333333333f, 0.400000000000f,
|
||||
0.500000000000f, 0.400000000000f, 0.250000000000f, 0.333333333333f
|
||||
};
|
||||
|
||||
static const float band_weights_noisy[18] = {
|
||||
0.400000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
|
||||
0.250000000000f, 0.250000000000f, 0.250000000000f, 0.250000000000f,
|
||||
0.166666666667f, 0.125000000000f, 0.125000000000f, 0.125000000000f,
|
||||
0.083333333333f, 0.062500000000f, 0.062500000000f, 0.050000000000f,
|
||||
0.041666666667f, 0.080000000000f
|
||||
};
|
||||
|
||||
static float osce_window[OSCE_SPEC_WINDOW_SIZE] = {
|
||||
0.004908718808f, 0.014725683311f, 0.024541228523f, 0.034354408400f, 0.044164277127f,
|
||||
0.053969889210f, 0.063770299562f, 0.073564563600f, 0.083351737332f, 0.093130877450f,
|
||||
0.102901041421f, 0.112661287575f, 0.122410675199f, 0.132148264628f, 0.141873117332f,
|
||||
0.151584296010f, 0.161280864678f, 0.170961888760f, 0.180626435180f, 0.190273572448f,
|
||||
0.199902370753f, 0.209511902052f, 0.219101240157f, 0.228669460829f, 0.238215641862f,
|
||||
0.247738863176f, 0.257238206902f, 0.266712757475f, 0.276161601717f, 0.285583828929f,
|
||||
0.294978530977f, 0.304344802381f, 0.313681740399f, 0.322988445118f, 0.332264019538f,
|
||||
0.341507569661f, 0.350718204573f, 0.359895036535f, 0.369037181064f, 0.378143757022f,
|
||||
0.387213886697f, 0.396246695891f, 0.405241314005f, 0.414196874117f, 0.423112513073f,
|
||||
0.431987371563f, 0.440820594212f, 0.449611329655f, 0.458358730621f, 0.467061954019f,
|
||||
0.475720161014f, 0.484332517110f, 0.492898192230f, 0.501416360796f, 0.509886201809f,
|
||||
0.518306898929f, 0.526677640552f, 0.534997619887f, 0.543266035038f, 0.551482089078f,
|
||||
0.559644990127f, 0.567753951426f, 0.575808191418f, 0.583806933818f, 0.591749407690f,
|
||||
0.599634847523f, 0.607462493302f, 0.615231590581f, 0.622941390558f, 0.630591150148f,
|
||||
0.638180132051f, 0.645707604824f, 0.653172842954f, 0.660575126926f, 0.667913743292f,
|
||||
0.675187984742f, 0.682397150168f, 0.689540544737f, 0.696617479953f, 0.703627273726f,
|
||||
0.710569250438f, 0.717442741007f, 0.724247082951f, 0.730981620454f, 0.737645704427f,
|
||||
0.744238692572f, 0.750759949443f, 0.757208846506f, 0.763584762206f, 0.769887082016f,
|
||||
0.776115198508f, 0.782268511401f, 0.788346427627f, 0.794348361383f, 0.800273734191f,
|
||||
0.806121974951f, 0.811892519997f, 0.817584813152f, 0.823198305781f, 0.828732456844f,
|
||||
0.834186732948f, 0.839560608398f, 0.844853565250f, 0.850065093356f, 0.855194690420f,
|
||||
0.860241862039f, 0.865206121757f, 0.870086991109f, 0.874883999665f, 0.879596685080f,
|
||||
0.884224593137f, 0.888767277786f, 0.893224301196f, 0.897595233788f, 0.901879654283f,
|
||||
0.906077149740f, 0.910187315596f, 0.914209755704f, 0.918144082372f, 0.921989916403f,
|
||||
0.925746887127f, 0.929414632439f, 0.932992798835f, 0.936481041442f, 0.939879024058f,
|
||||
0.943186419177f, 0.946402908026f, 0.949528180593f, 0.952561935658f, 0.955503880820f,
|
||||
0.958353732530f, 0.961111216112f, 0.963776065795f, 0.966348024735f, 0.968826845041f,
|
||||
0.971212287799f, 0.973504123096f, 0.975702130039f, 0.977806096779f, 0.979815820533f,
|
||||
0.981731107599f, 0.983551773378f, 0.985277642389f, 0.986908548290f, 0.988444333892f,
|
||||
0.989884851171f, 0.991229961288f, 0.992479534599f, 0.993633450666f, 0.994691598273f,
|
||||
0.995653875433f, 0.996520189401f, 0.997290456679f, 0.997964603026f, 0.998542563469f,
|
||||
0.999024282300f, 0.999409713092f, 0.999698818696f, 0.999891571247f, 0.999987952167f,
|
||||
0.999987952167f, 0.999891571247f, 0.999698818696f, 0.999409713092f, 0.999024282300f,
|
||||
0.998542563469f, 0.997964603026f, 0.997290456679f, 0.996520189401f, 0.995653875433f,
|
||||
0.994691598273f, 0.993633450666f, 0.992479534599f, 0.991229961288f, 0.989884851171f,
|
||||
0.988444333892f, 0.986908548290f, 0.985277642389f, 0.983551773378f, 0.981731107599f,
|
||||
0.979815820533f, 0.977806096779f, 0.975702130039f, 0.973504123096f, 0.971212287799f,
|
||||
0.968826845041f, 0.966348024735f, 0.963776065795f, 0.961111216112f, 0.958353732530f,
|
||||
0.955503880820f, 0.952561935658f, 0.949528180593f, 0.946402908026f, 0.943186419177f,
|
||||
0.939879024058f, 0.936481041442f, 0.932992798835f, 0.929414632439f, 0.925746887127f,
|
||||
0.921989916403f, 0.918144082372f, 0.914209755704f, 0.910187315596f, 0.906077149740f,
|
||||
0.901879654283f, 0.897595233788f, 0.893224301196f, 0.888767277786f, 0.884224593137f,
|
||||
0.879596685080f, 0.874883999665f, 0.870086991109f, 0.865206121757f, 0.860241862039f,
|
||||
0.855194690420f, 0.850065093356f, 0.844853565250f, 0.839560608398f, 0.834186732948f,
|
||||
0.828732456844f, 0.823198305781f, 0.817584813152f, 0.811892519997f, 0.806121974951f,
|
||||
0.800273734191f, 0.794348361383f, 0.788346427627f, 0.782268511401f, 0.776115198508f,
|
||||
0.769887082016f, 0.763584762206f, 0.757208846506f, 0.750759949443f, 0.744238692572f,
|
||||
0.737645704427f, 0.730981620454f, 0.724247082951f, 0.717442741007f, 0.710569250438f,
|
||||
0.703627273726f, 0.696617479953f, 0.689540544737f, 0.682397150168f, 0.675187984742f,
|
||||
0.667913743292f, 0.660575126926f, 0.653172842954f, 0.645707604824f, 0.638180132051f,
|
||||
0.630591150148f, 0.622941390558f, 0.615231590581f, 0.607462493302f, 0.599634847523f,
|
||||
0.591749407690f, 0.583806933818f, 0.575808191418f, 0.567753951426f, 0.559644990127f,
|
||||
0.551482089078f, 0.543266035038f, 0.534997619887f, 0.526677640552f, 0.518306898929f,
|
||||
0.509886201809f, 0.501416360796f, 0.492898192230f, 0.484332517110f, 0.475720161014f,
|
||||
0.467061954019f, 0.458358730621f, 0.449611329655f, 0.440820594212f, 0.431987371563f,
|
||||
0.423112513073f, 0.414196874117f, 0.405241314005f, 0.396246695891f, 0.387213886697f,
|
||||
0.378143757022f, 0.369037181064f, 0.359895036535f, 0.350718204573f, 0.341507569661f,
|
||||
0.332264019538f, 0.322988445118f, 0.313681740399f, 0.304344802381f, 0.294978530977f,
|
||||
0.285583828929f, 0.276161601717f, 0.266712757475f, 0.257238206902f, 0.247738863176f,
|
||||
0.238215641862f, 0.228669460829f, 0.219101240157f, 0.209511902052f, 0.199902370753f,
|
||||
0.190273572448f, 0.180626435180f, 0.170961888760f, 0.161280864678f, 0.151584296010f,
|
||||
0.141873117332f, 0.132148264628f, 0.122410675199f, 0.112661287575f, 0.102901041421f,
|
||||
0.093130877450f, 0.083351737332f, 0.073564563600f, 0.063770299562f, 0.053969889210f,
|
||||
0.044164277127f, 0.034354408400f, 0.024541228523f, 0.014725683311f, 0.004908718808f
|
||||
};
|
||||
|
||||
static void apply_filterbank(float *x_out, float *x_in, const int *center_bins, const float* band_weights, int num_bands)
|
||||
{
|
||||
int b, i;
|
||||
float frac;
|
||||
|
||||
celt_assert(x_in != x_out)
|
||||
|
||||
x_out[0] = 0;
|
||||
for (b = 0; b < num_bands - 1; b++)
|
||||
{
|
||||
x_out[b+1] = 0;
|
||||
for (i = center_bins[b]; i < center_bins[b+1]; i++)
|
||||
{
|
||||
frac = (float) (center_bins[b+1] - i) / (center_bins[b+1] - center_bins[b]);
|
||||
x_out[b] += band_weights[b] * frac * x_in[i];
|
||||
x_out[b+1] += band_weights[b+1] * (1 - frac) * x_in[i];
|
||||
|
||||
}
|
||||
}
|
||||
x_out[num_bands - 1] += band_weights[num_bands - 1] * x_in[center_bins[num_bands - 1]];
|
||||
#ifdef DEBUG_PRINT
|
||||
for (b = 0; b < num_bands; b++)
|
||||
{
|
||||
printf("band[%d]: %f\n", b, x_out[b]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void mag_spec_320_onesided(float *out, float *in)
|
||||
{
|
||||
celt_assert(OSCE_SPEC_WINDOW_SIZE == 320);
|
||||
kiss_fft_cpx buffer[OSCE_SPEC_WINDOW_SIZE];
|
||||
int k;
|
||||
forward_transform(buffer, in);
|
||||
|
||||
for (k = 0; k < OSCE_SPEC_NUM_FREQS; k++)
|
||||
{
|
||||
out[k] = OSCE_SPEC_WINDOW_SIZE * sqrt(buffer[k].r * buffer[k].r + buffer[k].i * buffer[k].i);
|
||||
#ifdef DEBUG_PRINT
|
||||
printf("magspec[%d]: %f\n", k, out[k]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void calculate_log_spectrum_from_lpc(float *spec, opus_int16 *a_q12, int lpc_order)
|
||||
{
|
||||
float buffer[OSCE_SPEC_WINDOW_SIZE] = {0};
|
||||
int i;
|
||||
|
||||
/* zero expansion */
|
||||
buffer[0] = 1;
|
||||
for (i = 0; i < lpc_order; i++)
|
||||
{
|
||||
buffer[i+1] = - (float)a_q12[i] / (1U << 12);
|
||||
}
|
||||
|
||||
/* calculate and invert magnitude spectrum */
|
||||
mag_spec_320_onesided(buffer, buffer);
|
||||
|
||||
for (i = 0; i < OSCE_SPEC_NUM_FREQS; i++)
|
||||
{
|
||||
buffer[i] = 1.f / (buffer[i] + 1e-9f);
|
||||
}
|
||||
|
||||
/* apply filterbank */
|
||||
apply_filterbank(spec, buffer, center_bins_clean, band_weights_clean, OSCE_CLEAN_SPEC_NUM_BANDS);
|
||||
|
||||
/* log and scaling */
|
||||
for (i = 0; i < OSCE_CLEAN_SPEC_NUM_BANDS; i++)
|
||||
{
|
||||
spec[i] = 0.3f * log(spec[i] + 1e-9f);
|
||||
}
|
||||
}
|
||||
|
||||
static void calculate_cepstrum(float *cepstrum, float *signal)
|
||||
{
|
||||
float buffer[OSCE_SPEC_WINDOW_SIZE];
|
||||
float *spec = &buffer[OSCE_SPEC_NUM_FREQS + 3];
|
||||
int n;
|
||||
|
||||
celt_assert(cepstrum != signal)
|
||||
|
||||
for (n = 0; n < OSCE_SPEC_WINDOW_SIZE; n++)
|
||||
{
|
||||
buffer[n] = osce_window[n] * signal[n];
|
||||
}
|
||||
|
||||
/* calculate magnitude spectrum */
|
||||
mag_spec_320_onesided(buffer, buffer);
|
||||
|
||||
/* accumulate bands */
|
||||
apply_filterbank(spec, buffer, center_bins_noisy, band_weights_noisy, OSCE_NOISY_SPEC_NUM_BANDS);
|
||||
|
||||
/* log domain conversion */
|
||||
for (n = 0; n < OSCE_NOISY_SPEC_NUM_BANDS; n++)
|
||||
{
|
||||
spec[n] = log(spec[n] + 1e-9f);
|
||||
#ifdef DEBUG_PRINT
|
||||
printf("logspec[%d]: %f\n", n, spec[n]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* DCT-II (orthonormal) */
|
||||
celt_assert(OSCE_NOISY_SPEC_NUM_BANDS == NB_BANDS);
|
||||
dct(cepstrum, spec);
|
||||
}
|
||||
|
||||
static void calculate_acorr(float *acorr, float *signal, int lag)
|
||||
{
|
||||
int n, k;
|
||||
celt_assert(acorr != signal)
|
||||
|
||||
for (k = -2; k <= 2; k++)
|
||||
{
|
||||
acorr[k+2] = 0;
|
||||
float xx = 0;
|
||||
float xy = 0;
|
||||
float yy = 0;
|
||||
for (n = 0; n < 80; n++)
|
||||
{
|
||||
/* obviously wasteful -> fix later */
|
||||
xx += signal[n] * signal[n];
|
||||
yy += signal[n - lag + k] * signal[n - lag + k];
|
||||
xy += signal[n] * signal[n - lag + k];
|
||||
}
|
||||
acorr[k+2] = xy / sqrt(xx * yy + 1e-9f);
|
||||
}
|
||||
}
|
||||
|
||||
static int pitch_postprocessing(OSCEFeatureState *psFeatures, int lag, int type)
|
||||
{
|
||||
int new_lag;
|
||||
|
||||
#ifdef OSCE_HANGOVER_BUGFIX
|
||||
#define TESTBIT 1
|
||||
#else
|
||||
#define TESTBIT 0
|
||||
#endif
|
||||
|
||||
/* hangover is currently disabled to reflect a bug in the python code. ToDo: re-evaluate hangover */
|
||||
if (type != TYPE_VOICED && psFeatures->last_type == TYPE_VOICED && TESTBIT)
|
||||
/* enter hangover */
|
||||
{
|
||||
new_lag = OSCE_NO_PITCH_VALUE;
|
||||
if (psFeatures->pitch_hangover_count < OSCE_PITCH_HANGOVER)
|
||||
{
|
||||
new_lag = psFeatures->last_lag;
|
||||
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
|
||||
}
|
||||
}
|
||||
else if (type != TYPE_VOICED && psFeatures->pitch_hangover_count && TESTBIT)
|
||||
/* continue hangover */
|
||||
{
|
||||
new_lag = psFeatures->last_lag;
|
||||
psFeatures->pitch_hangover_count = (psFeatures->pitch_hangover_count + 1) % OSCE_PITCH_HANGOVER;
|
||||
}
|
||||
else if (type != TYPE_VOICED)
|
||||
/* unvoiced frame after hangover */
|
||||
{
|
||||
new_lag = OSCE_NO_PITCH_VALUE;
|
||||
psFeatures->pitch_hangover_count = 0;
|
||||
}
|
||||
else
|
||||
/* voiced frame: update last_lag */
|
||||
{
|
||||
new_lag = lag;
|
||||
psFeatures->last_lag = lag;
|
||||
psFeatures->pitch_hangover_count = 0;
|
||||
}
|
||||
|
||||
/* buffer update */
|
||||
psFeatures->last_type = type;
|
||||
|
||||
/* with the current setup this should never happen (but who knows...) */
|
||||
celt_assert(new_lag)
|
||||
|
||||
return new_lag;
|
||||
}
|
||||
|
||||
void osce_calculate_features(
|
||||
silk_decoder_state *psDec, /* I/O Decoder state */
|
||||
silk_decoder_control *psDecCtrl, /* I Decoder control */
|
||||
float *features, /* O input features */
|
||||
float *numbits, /* O numbits and smoothed numbits */
|
||||
int *periods, /* O pitch lags on subframe basis */
|
||||
const opus_int16 xq[], /* I Decoded speech */
|
||||
opus_int32 num_bits /* I Size of SILK payload in bits */
|
||||
)
|
||||
{
|
||||
int num_subframes, num_samples;
|
||||
float buffer[OSCE_FEATURES_MAX_HISTORY + OSCE_MAX_FEATURE_FRAMES * 80];
|
||||
float *frame, *pfeatures;
|
||||
OSCEFeatureState *psFeatures;
|
||||
int i, n, k;
|
||||
#ifdef WRITE_FEATURES
|
||||
static FILE *f_feat = NULL;
|
||||
if (f_feat == NULL)
|
||||
{
|
||||
f_feat = fopen("assembled_features.f32", "wb");
|
||||
}
|
||||
#endif
|
||||
|
||||
/*OPUS_CLEAR(buffer, 1);*/
|
||||
memset(buffer, 0, sizeof(buffer));
|
||||
|
||||
num_subframes = psDec->nb_subfr;
|
||||
num_samples = num_subframes * 80;
|
||||
psFeatures = &psDec->osce.features;
|
||||
|
||||
/* smooth bit count */
|
||||
psFeatures->numbits_smooth = 0.9f * psFeatures->numbits_smooth + 0.1f * num_bits;
|
||||
numbits[0] = num_bits;
|
||||
#ifdef OSCE_NUMBITS_BUGFIX
|
||||
numbits[1] = psFeatures->numbits_smooth;
|
||||
#else
|
||||
numbits[1] = num_bits;
|
||||
#endif
|
||||
|
||||
for (n = 0; n < num_samples; n++)
|
||||
{
|
||||
buffer[OSCE_FEATURES_MAX_HISTORY + n] = (float) xq[n] / (1U<<15);
|
||||
}
|
||||
OPUS_COPY(buffer, psFeatures->signal_history, OSCE_FEATURES_MAX_HISTORY);
|
||||
|
||||
for (k = 0; k < num_subframes; k++)
|
||||
{
|
||||
pfeatures = features + k * OSCE_FEATURE_DIM;
|
||||
frame = &buffer[OSCE_FEATURES_MAX_HISTORY + k * 80];
|
||||
memset(pfeatures, 0, OSCE_FEATURE_DIM); /* precaution */
|
||||
|
||||
/* clean spectrum from lpcs (update every other frame) */
|
||||
if (k % 2 == 0)
|
||||
{
|
||||
calculate_log_spectrum_from_lpc(pfeatures + OSCE_CLEAN_SPEC_START, psDecCtrl->PredCoef_Q12[k >> 1], psDec->LPC_order);
|
||||
}
|
||||
else
|
||||
{
|
||||
OPUS_COPY(pfeatures + OSCE_CLEAN_SPEC_START, pfeatures + OSCE_CLEAN_SPEC_START - OSCE_FEATURE_DIM, OSCE_CLEAN_SPEC_LENGTH);
|
||||
}
|
||||
|
||||
/* noisy cepstrum from signal (update every other frame) */
|
||||
if (k % 2 == 0)
|
||||
{
|
||||
calculate_cepstrum(pfeatures + OSCE_NOISY_CEPSTRUM_START, frame - 160);
|
||||
}
|
||||
else
|
||||
{
|
||||
OPUS_COPY(pfeatures + OSCE_NOISY_CEPSTRUM_START, pfeatures + OSCE_NOISY_CEPSTRUM_START - OSCE_FEATURE_DIM, OSCE_NOISY_CEPSTRUM_LENGTH);
|
||||
}
|
||||
|
||||
/* pitch hangover and zero value replacement */
|
||||
periods[k] = pitch_postprocessing(psFeatures, psDecCtrl->pitchL[k], psDec->indices.signalType);
|
||||
|
||||
/* auto-correlation around pitch lag */
|
||||
calculate_acorr(pfeatures + OSCE_ACORR_START, frame, periods[k]);
|
||||
|
||||
/* ltp */
|
||||
celt_assert(OSCE_LTP_LENGTH == LTP_ORDER)
|
||||
for (i = 0; i < OSCE_LTP_LENGTH; i++)
|
||||
{
|
||||
pfeatures[OSCE_LTP_START + i] = (float) psDecCtrl->LTPCoef_Q14[k * LTP_ORDER + i] / (1U << 14);
|
||||
}
|
||||
|
||||
/* frame gain */
|
||||
pfeatures[OSCE_LOG_GAIN_START] = log((float) psDecCtrl->Gains_Q16[k] / (1UL << 16) + 1e-9f);
|
||||
|
||||
#ifdef WRITE_FEATURES
|
||||
fwrite(pfeatures, sizeof(*pfeatures), 93, f_feat);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* buffer update */
|
||||
OPUS_COPY(psFeatures->signal_history, &buffer[num_samples], OSCE_FEATURES_MAX_HISTORY);
|
||||
}
|
||||
|
||||
|
||||
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length)
|
||||
{
|
||||
int i;
|
||||
celt_assert(length >= 160);
|
||||
|
||||
for (i = 0; i < 160; i++)
|
||||
{
|
||||
x_enhanced[i] = osce_window[i] * x_enhanced[i] + (1.f - osce_window[i]) * x_in[i];
|
||||
}
|
||||
|
||||
|
||||
}
|
50
dnn/osce_features.h
Normal file
50
dnn/osce_features.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_FEATURES_H
|
||||
#define OSCE_FEATURES_H
|
||||
|
||||
|
||||
#include "structs.h"
|
||||
#include "opus_types.h"
|
||||
|
||||
#define OSCE_NUMBITS_BUGFIX
|
||||
|
||||
void osce_calculate_features(
|
||||
silk_decoder_state *psDec, /* I/O Decoder state */
|
||||
silk_decoder_control *psDecCtrl, /* I Decoder control */
|
||||
float *features, /* O input features */
|
||||
float *numbits, /* O numbits and smoothed numbits */
|
||||
int *periods, /* O pitch lags on subframe basis */
|
||||
const opus_int16 xq[], /* I Decoded speech */
|
||||
opus_int32 num_bits /* I Size of SILK payload in bits */
|
||||
);
|
||||
|
||||
|
||||
void osce_cross_fade_10ms(float *x_enhanced, float *x_in, int length);
|
||||
|
||||
#endif
|
124
dnn/osce_structs.h
Normal file
124
dnn/osce_structs.h
Normal file
|
@ -0,0 +1,124 @@
|
|||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OSCE_STRUCTS_H
|
||||
#define OSCE_STRUCTS_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "osce_config.h"
|
||||
#ifndef DISABLE_LACE
|
||||
#include "lace_data.h"
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
#include "nolace_data.h"
|
||||
#endif
|
||||
#include "nndsp.h"
|
||||
#include "nnet.h"
|
||||
|
||||
/* feature calculation */
|
||||
|
||||
typedef struct {
|
||||
float numbits_smooth;
|
||||
int pitch_hangover_count;
|
||||
int last_lag;
|
||||
int last_type;
|
||||
float signal_history[OSCE_FEATURES_MAX_HISTORY];
|
||||
int reset;
|
||||
} OSCEFeatureState;
|
||||
|
||||
|
||||
#ifndef DISABLE_LACE
|
||||
/* LACE */
|
||||
typedef struct {
|
||||
float feature_net_conv2_state[LACE_FNET_CONV2_STATE_SIZE];
|
||||
float feature_net_gru_state[LACE_COND_DIM];
|
||||
AdaCombState cf1_state;
|
||||
AdaCombState cf2_state;
|
||||
AdaConvState af1_state;
|
||||
float preemph_mem;
|
||||
float deemph_mem;
|
||||
} LACEState;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
LACELayers layers;
|
||||
float window[LACE_OVERLAP_SIZE];
|
||||
} LACE;
|
||||
|
||||
#endif /* #ifndef DISABLE_LACE */
|
||||
|
||||
|
||||
#ifndef DISABLE_NOLACE
|
||||
/* NoLACE */
|
||||
typedef struct {
|
||||
float feature_net_conv2_state[NOLACE_FNET_CONV2_STATE_SIZE];
|
||||
float feature_net_gru_state[NOLACE_COND_DIM];
|
||||
float post_cf1_state[NOLACE_COND_DIM];
|
||||
float post_cf2_state[NOLACE_COND_DIM];
|
||||
float post_af1_state[NOLACE_COND_DIM];
|
||||
float post_af2_state[NOLACE_COND_DIM];
|
||||
float post_af3_state[NOLACE_COND_DIM];
|
||||
AdaCombState cf1_state;
|
||||
AdaCombState cf2_state;
|
||||
AdaConvState af1_state;
|
||||
AdaConvState af2_state;
|
||||
AdaConvState af3_state;
|
||||
AdaConvState af4_state;
|
||||
AdaShapeState tdshape1_state;
|
||||
AdaShapeState tdshape2_state;
|
||||
AdaShapeState tdshape3_state;
|
||||
float preemph_mem;
|
||||
float deemph_mem;
|
||||
} NoLACEState;
|
||||
|
||||
typedef struct {
|
||||
NOLACELayers layers;
|
||||
float window[LACE_OVERLAP_SIZE];
|
||||
} NoLACE;
|
||||
|
||||
#endif /* #ifndef DISABLE_NOLACE */
|
||||
|
||||
/* OSCEModel */
|
||||
typedef struct {
|
||||
#ifndef DISABLE_LACE
|
||||
LACE lace;
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
NoLACE nolace;
|
||||
#endif
|
||||
} OSCEModel;
|
||||
|
||||
typedef union {
|
||||
#ifndef DISABLE_LACE
|
||||
LACEState lace;
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
NoLACEState nolace;
|
||||
#endif
|
||||
} OSCEState;
|
||||
|
||||
#endif
|
165
dnn/torch/osce/create_testvectors.py
Normal file
165
dnn/torch/osce/create_testvectors.py
Normal file
|
@ -0,0 +1,165 @@
|
|||
"""
|
||||
/* Copyright (c) 2023 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
|
||||
import os
|
||||
import argparse
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from models import model_dict
|
||||
from utils import endoscopy
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('checkpoint_path', type=str, help='path to folder containing checkpoints "lace_checkpoint.pth" and nolace_checkpoint.pth"')
|
||||
parser.add_argument('output_folder', type=str, help='output folder for testvectors')
|
||||
parser.add_argument('--debug', action='store_true', help='add debug output to output folder')
|
||||
|
||||
|
||||
def create_adaconv_testvector(prefix, adaconv, num_frames, debug=False):
|
||||
feature_dim = adaconv.feature_dim
|
||||
in_channels = adaconv.in_channels
|
||||
out_channels = adaconv.out_channels
|
||||
frame_size = adaconv.frame_size
|
||||
|
||||
features = torch.randn((1, num_frames, feature_dim))
|
||||
x_in = torch.randn((1, in_channels, num_frames * frame_size))
|
||||
|
||||
x_out = adaconv(x_in, features, debug=debug)
|
||||
|
||||
features = features[0].detach().numpy()
|
||||
x_in = x_in[0].reshape(in_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
|
||||
x_out = x_out[0].reshape(out_channels, num_frames, frame_size).permute(1, 0, 2).detach().numpy()
|
||||
|
||||
features.tofile(prefix + '_features.f32')
|
||||
x_in.tofile(prefix + '_x_in.f32')
|
||||
x_out.tofile(prefix + '_x_out.f32')
|
||||
|
||||
def create_adacomb_testvector(prefix, adacomb, num_frames, debug=False):
|
||||
feature_dim = adacomb.feature_dim
|
||||
in_channels = 1
|
||||
frame_size = adacomb.frame_size
|
||||
|
||||
features = torch.randn((1, num_frames, feature_dim))
|
||||
x_in = torch.randn((1, in_channels, num_frames * frame_size))
|
||||
p_in = torch.randint(adacomb.kernel_size, 250, (1, num_frames))
|
||||
|
||||
x_out = adacomb(x_in, features, p_in, debug=debug)
|
||||
|
||||
features = features[0].detach().numpy()
|
||||
x_in = x_in[0].permute(1, 0).detach().numpy()
|
||||
p_in = p_in[0].detach().numpy().astype(np.int32)
|
||||
x_out = x_out[0].permute(1, 0).detach().numpy()
|
||||
|
||||
features.tofile(prefix + '_features.f32')
|
||||
x_in.tofile(prefix + '_x_in.f32')
|
||||
p_in.tofile(prefix + '_p_in.s32')
|
||||
x_out.tofile(prefix + '_x_out.f32')
|
||||
|
||||
def create_adashape_testvector(prefix, adashape, num_frames):
|
||||
feature_dim = adashape.feature_dim
|
||||
frame_size = adashape.frame_size
|
||||
|
||||
features = torch.randn((1, num_frames, feature_dim))
|
||||
x_in = torch.randn((1, 1, num_frames * frame_size))
|
||||
|
||||
x_out = adashape(x_in, features)
|
||||
|
||||
features = features[0].detach().numpy()
|
||||
x_in = x_in.flatten().detach().numpy()
|
||||
x_out = x_out.flatten().detach().numpy()
|
||||
|
||||
features.tofile(prefix + '_features.f32')
|
||||
x_in.tofile(prefix + '_x_in.f32')
|
||||
x_out.tofile(prefix + '_x_out.f32')
|
||||
|
||||
def create_feature_net_testvector(prefix, model, num_frames):
|
||||
num_features = model.num_features
|
||||
num_subframes = 4 * num_frames
|
||||
|
||||
input_features = torch.randn((1, num_subframes, num_features))
|
||||
periods = torch.randint(32, 300, (1, num_subframes))
|
||||
numbits = model.numbits_range[0] + torch.rand((1, num_frames, 2)) * (model.numbits_range[1] - model.numbits_range[0])
|
||||
|
||||
|
||||
pembed = model.pitch_embedding(periods)
|
||||
nembed = torch.repeat_interleave(model.numbits_embedding(numbits).flatten(2), 4, dim=1)
|
||||
full_features = torch.cat((input_features, pembed, nembed), dim=-1)
|
||||
|
||||
cf = model.feature_net(full_features)
|
||||
|
||||
input_features.float().numpy().tofile(prefix + "_in_features.f32")
|
||||
periods.numpy().astype(np.int32).tofile(prefix + "_periods.s32")
|
||||
numbits.float().numpy().tofile(prefix + "_numbits.f32")
|
||||
full_features.detach().numpy().tofile(prefix + "_full_features.f32")
|
||||
cf.detach().numpy().tofile(prefix + "_out_features.f32")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
|
||||
os.makedirs(args.output_folder, exist_ok=True)
|
||||
|
||||
lace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "lace_checkpoint.pth"), map_location='cpu')
|
||||
nolace_checkpoint = torch.load(os.path.join(args.checkpoint_path, "nolace_checkpoint.pth"), map_location='cpu')
|
||||
|
||||
lace = model_dict['lace'](**lace_checkpoint['setup']['model']['kwargs'])
|
||||
nolace = model_dict['nolace'](**nolace_checkpoint['setup']['model']['kwargs'])
|
||||
|
||||
lace.load_state_dict(lace_checkpoint['state_dict'])
|
||||
nolace.load_state_dict(nolace_checkpoint['state_dict'])
|
||||
|
||||
if args.debug:
|
||||
endoscopy.init(args.output_folder)
|
||||
|
||||
# lace af1, 1 input channel, 1 output channel
|
||||
create_adaconv_testvector(os.path.join(args.output_folder, "lace_af1"), lace.af1, 5, debug=args.debug)
|
||||
|
||||
# nolace af1, 1 input channel, 2 output channels
|
||||
create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af1"), nolace.af1, 5, debug=args.debug)
|
||||
|
||||
# nolace af4, 2 input channel, 1 output channels
|
||||
create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af4"), nolace.af4, 5, debug=args.debug)
|
||||
|
||||
# nolace af2, 2 input channel, 2 output channels
|
||||
create_adaconv_testvector(os.path.join(args.output_folder, "nolace_af2"), nolace.af2, 5, debug=args.debug)
|
||||
|
||||
# lace cf1
|
||||
create_adacomb_testvector(os.path.join(args.output_folder, "lace_cf1"), lace.cf1, 5, debug=args.debug)
|
||||
|
||||
# nolace tdshape1
|
||||
create_adashape_testvector(os.path.join(args.output_folder, "nolace_tdshape1"), nolace.tdshape1, 5)
|
||||
|
||||
# lace feature net
|
||||
create_feature_net_testvector(os.path.join(args.output_folder, 'lace'), lace, 5)
|
||||
|
||||
if args.debug:
|
||||
endoscopy.close()
|
|
@ -49,7 +49,6 @@ class SilkEnhancementSet(Dataset):
|
|||
num_bands_noisy_spec=18,
|
||||
noisy_spec_scale='opus',
|
||||
noisy_apply_dct=True,
|
||||
add_offset=False,
|
||||
add_double_lag_acorr=False,
|
||||
):
|
||||
|
||||
|
@ -73,7 +72,6 @@ class SilkEnhancementSet(Dataset):
|
|||
self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32)
|
||||
self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32)
|
||||
self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32)
|
||||
self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
|
||||
|
||||
self.clean_signal_hp = np.fromfile(os.path.join(path, 'clean_hp.s16'), dtype=np.int16)
|
||||
self.clean_signal = np.fromfile(os.path.join(path, 'clean.s16'), dtype=np.int16)
|
||||
|
@ -86,7 +84,6 @@ class SilkEnhancementSet(Dataset):
|
|||
num_bands_noisy_spec,
|
||||
noisy_spec_scale,
|
||||
noisy_apply_dct,
|
||||
add_offset,
|
||||
add_double_lag_acorr)
|
||||
|
||||
self.history_len = 700 if add_double_lag_acorr else 350
|
||||
|
@ -120,8 +117,7 @@ class SilkEnhancementSet(Dataset):
|
|||
self.lpcs[frame_start : frame_stop],
|
||||
self.gains[frame_start : frame_stop],
|
||||
self.ltps[frame_start : frame_stop],
|
||||
self.periods[frame_start : frame_stop],
|
||||
self.offsets[frame_start : frame_stop]
|
||||
self.periods[frame_start : frame_stop]
|
||||
)
|
||||
|
||||
if self.preemph > 0:
|
||||
|
|
|
@ -40,10 +40,53 @@ import wexchange.torch
|
|||
from wexchange.torch import dump_torch_weights
|
||||
from models import model_dict
|
||||
|
||||
from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
|
||||
from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
|
||||
from utils.layers.td_shaper import TDShaper
|
||||
from wexchange.torch import dump_torch_weights
|
||||
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument('checkpoint', type=str, help='LACE or NoLACE model checkpoint')
|
||||
parser.add_argument('output_dir', type=str, help='output folder')
|
||||
parser.add_argument('--quantize', action="store_true", help='quantization according to schedule')
|
||||
|
||||
|
||||
schedules = {
|
||||
'nolace': [
|
||||
('pitch_embedding', dict()),
|
||||
('feature_net.conv1', dict()),
|
||||
('feature_net.conv2', dict(quantize=True, scale=None)),
|
||||
('feature_net.tconv', dict(quantize=True, scale=None)),
|
||||
('feature_net.gru', dict()),
|
||||
('cf1', dict(quantize=True, scale=None)),
|
||||
('cf2', dict(quantize=True, scale=None)),
|
||||
('af1', dict(quantize=True, scale=None)),
|
||||
('tdshape1', dict()),
|
||||
('tdshape2', dict()),
|
||||
('tdshape3', dict()),
|
||||
('af2', dict(quantize=True, scale=None)),
|
||||
('af3', dict(quantize=True, scale=None)),
|
||||
('af4', dict(quantize=True, scale=None)),
|
||||
('post_cf1', dict(quantize=True, scale=None)),
|
||||
('post_cf2', dict(quantize=True, scale=None)),
|
||||
('post_af1', dict(quantize=True, scale=None)),
|
||||
('post_af2', dict(quantize=True, scale=None)),
|
||||
('post_af3', dict(quantize=True, scale=None))
|
||||
],
|
||||
'lace' : [
|
||||
('pitch_embedding', dict()),
|
||||
('feature_net.conv1', dict()),
|
||||
('feature_net.conv2', dict(quantize=True, scale=None)),
|
||||
('feature_net.tconv', dict(quantize=True, scale=None)),
|
||||
('feature_net.gru', dict()),
|
||||
('cf1', dict(quantize=True, scale=None)),
|
||||
('cf2', dict(quantize=True, scale=None)),
|
||||
('af1', dict(quantize=True, scale=None))
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# auxiliary functions
|
||||
|
@ -60,8 +103,28 @@ def sha1(filename):
|
|||
|
||||
return sha1.hexdigest()
|
||||
|
||||
def osce_dump_generic(writer, name, module):
|
||||
if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
|
||||
or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding) \
|
||||
or isinstance(module, LimitedAdaptiveConv1d) or isinstance(module, LimitedAdaptiveComb1d) \
|
||||
or isinstance(module, TDShaper) or isinstance(module, torch.nn.GRU):
|
||||
dump_torch_weights(writer, module, name=name, verbose=True)
|
||||
else:
|
||||
for child_name, child in module.named_children():
|
||||
osce_dump_generic(writer, (name + "_" + child_name).replace("feature_net", "fnet"), child)
|
||||
|
||||
|
||||
def export_name(name):
|
||||
return name.replace('.', '_')
|
||||
name = name.replace('.', '_')
|
||||
name = name.replace('feature_net', 'fnet')
|
||||
return name
|
||||
|
||||
def osce_scheduled_dump(writer, prefix, model, schedule):
|
||||
if not prefix.endswith('_'):
|
||||
prefix += '_'
|
||||
|
||||
for name, kwargs in schedule:
|
||||
dump_torch_weights(writer, model.get_submodule(name), prefix + export_name(name), **kwargs, verbose=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
|
@ -76,22 +139,34 @@ if __name__ == "__main__":
|
|||
# create model and load weights
|
||||
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
||||
model = model_dict[checkpoint['setup']['model']['name']](*checkpoint['setup']['model']['args'], **checkpoint['setup']['model']['kwargs'])
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
|
||||
# CWriter
|
||||
model_name = checkpoint['setup']['model']['name']
|
||||
cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper())
|
||||
cwriter = wexchange.c_export.CWriter(os.path.join(outdir, model_name + "_data"), message=message, model_struct_name=model_name.upper() + 'Layers', add_typedef=True)
|
||||
|
||||
# dump numbits_embedding parameters by hand
|
||||
numbits_embedding = model.get_submodule('numbits_embedding')
|
||||
weights = next(iter(numbits_embedding.parameters()))
|
||||
for i, c in enumerate(weights):
|
||||
cwriter.header.write(f"\nNUMBITS_COEF_{i} {float(c.detach())}f")
|
||||
cwriter.header.write("\n\n")
|
||||
# Add custom includes and global parameters
|
||||
cwriter.header.write(f'''
|
||||
#define {model_name.upper()}_PREEMPH {model.preemph}f
|
||||
#define {model_name.upper()}_FRAME_SIZE {model.FRAME_SIZE}
|
||||
#define {model_name.upper()}_OVERLAP_SIZE 40
|
||||
#define {model_name.upper()}_NUM_FEATURES {model.num_features}
|
||||
#define {model_name.upper()}_PITCH_MAX {model.pitch_max}
|
||||
#define {model_name.upper()}_PITCH_EMBEDDING_DIM {model.pitch_embedding_dim}
|
||||
#define {model_name.upper()}_NUMBITS_RANGE_LOW {model.numbits_range[0]}
|
||||
#define {model_name.upper()}_NUMBITS_RANGE_HIGH {model.numbits_range[1]}
|
||||
#define {model_name.upper()}_NUMBITS_EMBEDDING_DIM {model.numbits_embedding_dim}
|
||||
#define {model_name.upper()}_COND_DIM {model.cond_dim}
|
||||
#define {model_name.upper()}_HIDDEN_FEATURE_DIM {model.hidden_feature_dim}
|
||||
''')
|
||||
|
||||
for i, s in enumerate(model.numbits_embedding.scale_factors):
|
||||
cwriter.header.write(f"#define {model_name.upper()}_NUMBITS_SCALE_{i} {float(s.detach().cpu())}f\n")
|
||||
|
||||
# dump layers
|
||||
for name, module in model.named_modules():
|
||||
if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv1d) \
|
||||
or isinstance(module, torch.nn.ConvTranspose1d) or isinstance(module, torch.nn.Embedding):
|
||||
dump_torch_weights(cwriter, module, name=export_name(name), verbose=True)
|
||||
if model_name in schedules and args.quantize:
|
||||
osce_scheduled_dump(cwriter, model_name, model, schedules[model_name])
|
||||
else:
|
||||
osce_dump_generic(cwriter, model_name, model)
|
||||
|
||||
cwriter.close()
|
||||
|
|
|
@ -96,7 +96,7 @@ class LACE(NNSBase):
|
|||
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
|
||||
|
||||
# spectral shaping
|
||||
self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
|
||||
self.af1 = LimitedAdaptiveConv1d(1, 1, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
|
||||
|
||||
def flop_count(self, rate=16000, verbose=False):
|
||||
|
||||
|
|
|
@ -96,8 +96,8 @@ class NoLACE(NNSBase):
|
|||
# comb filters
|
||||
left_pad = self.kernel_size // 2
|
||||
right_pad = self.kernel_size - 1 - left_pad
|
||||
self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
|
||||
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, use_bias=False, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
|
||||
self.cf1 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
|
||||
self.cf2 = LimitedAdaptiveComb1d(self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, overlap_size=40, padding=[left_pad, right_pad], max_lag=pitch_max + 1, gain_limit_db=comb_gain_limit_db, global_gain_limits_db=global_gain_limits_db, norm_p=norm_p)
|
||||
|
||||
# spectral shaping
|
||||
self.af1 = LimitedAdaptiveConv1d(1, 2, self.kernel_size, cond_dim, frame_size=self.FRAME_SIZE, use_bias=False, padding=[self.kernel_size - 1, 0], gain_limits_db=conv_gain_limits_db, norm_p=norm_p)
|
||||
|
|
|
@ -41,13 +41,13 @@ class LimitedAdaptiveComb1d(nn.Module):
|
|||
feature_dim,
|
||||
frame_size=160,
|
||||
overlap_size=40,
|
||||
use_bias=True,
|
||||
padding=None,
|
||||
max_lag=256,
|
||||
name=None,
|
||||
gain_limit_db=10,
|
||||
global_gain_limits_db=[-6, 6],
|
||||
norm_p=2):
|
||||
norm_p=2,
|
||||
**kwargs):
|
||||
"""
|
||||
|
||||
Parameters:
|
||||
|
@ -87,7 +87,6 @@ class LimitedAdaptiveComb1d(nn.Module):
|
|||
self.kernel_size = kernel_size
|
||||
self.frame_size = frame_size
|
||||
self.overlap_size = overlap_size
|
||||
self.use_bias = use_bias
|
||||
self.max_lag = max_lag
|
||||
self.limit_db = gain_limit_db
|
||||
self.norm_p = norm_p
|
||||
|
@ -101,8 +100,6 @@ class LimitedAdaptiveComb1d(nn.Module):
|
|||
# network for generating convolution weights
|
||||
self.conv_kernel = nn.Linear(feature_dim, kernel_size)
|
||||
|
||||
if self.use_bias:
|
||||
self.conv_bias = nn.Linear(feature_dim,1)
|
||||
|
||||
# comb filter gain
|
||||
self.filter_gain = nn.Linear(feature_dim, 1)
|
||||
|
@ -154,9 +151,6 @@ class LimitedAdaptiveComb1d(nn.Module):
|
|||
conv_kernels = self.conv_kernel(features).reshape((batch_size, num_frames, self.out_channels, self.in_channels, self.kernel_size))
|
||||
conv_kernels = conv_kernels / (1e-6 + torch.norm(conv_kernels, p=self.norm_p, dim=-1, keepdim=True))
|
||||
|
||||
if self.use_bias:
|
||||
conv_biases = self.conv_bias(features).permute(0, 2, 1)
|
||||
|
||||
conv_gains = torch.exp(- torch.relu(self.filter_gain(features).permute(0, 2, 1)) + self.log_gain_limit)
|
||||
# calculate gains
|
||||
global_conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.global_filter_gain(features).permute(0, 2, 1)) + self.filter_gain_b)
|
||||
|
@ -190,10 +184,6 @@ class LimitedAdaptiveComb1d(nn.Module):
|
|||
|
||||
new_chunk = torch.conv1d(xx, conv_kernels[:, i, ...].reshape((batch_size * self.out_channels, self.in_channels, self.kernel_size)), groups=batch_size).reshape(batch_size, self.out_channels, -1)
|
||||
|
||||
|
||||
if self.use_bias:
|
||||
new_chunk = new_chunk + conv_biases[:, :, i : i + 1]
|
||||
|
||||
offset = self.max_lag + self.padding[0]
|
||||
new_chunk = global_conv_gains[:, :, i : i + 1] * (new_chunk * conv_gains[:, :, i : i + 1] + x[..., offset + i * frame_size : offset + (i + 1) * frame_size + overlap_size])
|
||||
|
||||
|
@ -223,10 +213,6 @@ class LimitedAdaptiveComb1d(nn.Module):
|
|||
count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
|
||||
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
|
||||
|
||||
# bias computation
|
||||
if self.use_bias:
|
||||
count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
|
||||
|
||||
# a0 computation
|
||||
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
|
||||
|
||||
|
|
|
@ -46,12 +46,12 @@ class LimitedAdaptiveConv1d(nn.Module):
|
|||
feature_dim,
|
||||
frame_size=160,
|
||||
overlap_size=40,
|
||||
use_bias=True,
|
||||
padding=None,
|
||||
name=None,
|
||||
gain_limits_db=[-6, 6],
|
||||
shape_gain_db=0,
|
||||
norm_p=2):
|
||||
norm_p=2,
|
||||
**kwargs):
|
||||
"""
|
||||
|
||||
Parameters:
|
||||
|
@ -90,7 +90,6 @@ class LimitedAdaptiveConv1d(nn.Module):
|
|||
self.kernel_size = kernel_size
|
||||
self.frame_size = frame_size
|
||||
self.overlap_size = overlap_size
|
||||
self.use_bias = use_bias
|
||||
self.gain_limits_db = gain_limits_db
|
||||
self.shape_gain_db = shape_gain_db
|
||||
self.norm_p = norm_p
|
||||
|
@ -104,9 +103,6 @@ class LimitedAdaptiveConv1d(nn.Module):
|
|||
# network for generating convolution weights
|
||||
self.conv_kernel = nn.Linear(feature_dim, in_channels * out_channels * kernel_size)
|
||||
|
||||
if self.use_bias:
|
||||
self.conv_bias = nn.Linear(feature_dim, out_channels)
|
||||
|
||||
self.shape_gain = min(1, 10**(shape_gain_db / 20))
|
||||
|
||||
self.filter_gain = nn.Linear(feature_dim, out_channels)
|
||||
|
@ -133,10 +129,6 @@ class LimitedAdaptiveConv1d(nn.Module):
|
|||
count += 2 * (frame_rate * self.feature_dim * self.kernel_size)
|
||||
count += 2 * (self.in_channels * self.out_channels * self.kernel_size * (1 + overhead) * rate)
|
||||
|
||||
# bias computation
|
||||
if self.use_bias:
|
||||
count += 2 * (frame_rate * self.feature_dim) + rate * (1 + overhead)
|
||||
|
||||
# gain computation
|
||||
|
||||
count += 2 * (frame_rate * self.feature_dim * self.out_channels) + rate * (1 + overhead) * self.out_channels
|
||||
|
@ -183,9 +175,6 @@ class LimitedAdaptiveConv1d(nn.Module):
|
|||
|
||||
conv_kernels = self.shape_gain * conv_kernels + (1 - self.shape_gain) * id_kernels
|
||||
|
||||
if self.use_bias:
|
||||
conv_biases = self.conv_bias(features).permute(0, 2, 1)
|
||||
|
||||
# calculate gains
|
||||
conv_gains = torch.exp(self.filter_gain_a * torch.tanh(self.filter_gain(features)) + self.filter_gain_b)
|
||||
if debug and batch_size == 1:
|
||||
|
|
|
@ -33,6 +33,7 @@ import numpy as np
|
|||
import torch
|
||||
|
||||
import scipy
|
||||
import scipy.signal
|
||||
|
||||
from utils.pitch import hangover, calculate_acorr_window
|
||||
from utils.spec import create_filter_bank, cepstrum, log_spectrum, log_spectrum_from_lpc
|
||||
|
@ -59,7 +60,6 @@ def silk_feature_factory(no_pitch_value=256,
|
|||
num_bands_noisy_spec=18,
|
||||
noisy_spec_scale='opus',
|
||||
noisy_apply_dct=True,
|
||||
add_offset=False,
|
||||
add_double_lag_acorr=False
|
||||
):
|
||||
|
||||
|
@ -67,7 +67,7 @@ def silk_feature_factory(no_pitch_value=256,
|
|||
fb_clean_spec = create_filter_bank(num_bands_clean_spec, 320, scale='erb', round_center_bins=True, normalize=True)
|
||||
fb_noisy_spec = create_filter_bank(num_bands_noisy_spec, 320, scale=noisy_spec_scale, round_center_bins=True, normalize=True)
|
||||
|
||||
def create_features(noisy, noisy_history, lpcs, gains, ltps, periods, offsets):
|
||||
def create_features(noisy, noisy_history, lpcs, gains, ltps, periods):
|
||||
|
||||
periods = periods.copy()
|
||||
|
||||
|
@ -89,10 +89,7 @@ def silk_feature_factory(no_pitch_value=256,
|
|||
|
||||
acorr, _ = calculate_acorr_window(noisy, 80, periods, noisy_history, radius=acorr_radius, add_double_lag_acorr=add_double_lag_acorr)
|
||||
|
||||
if add_offset:
|
||||
features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains, offsets.reshape(-1, 1)), axis=-1, dtype=np.float32)
|
||||
else:
|
||||
features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
|
||||
features = np.concatenate((clean_spectrum, noisy_cepstrum, acorr, ltps, log_gains), axis=-1, dtype=np.float32)
|
||||
|
||||
return features, periods.astype(np.int64)
|
||||
|
||||
|
@ -110,7 +107,6 @@ def load_inference_data(path,
|
|||
num_bands_noisy_spec=18,
|
||||
noisy_spec_scale='opus',
|
||||
noisy_apply_dct=True,
|
||||
add_offset=False,
|
||||
add_double_lag_acorr=False,
|
||||
**kwargs):
|
||||
|
||||
|
@ -122,13 +118,12 @@ def load_inference_data(path,
|
|||
periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16)
|
||||
num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32).astype(np.float32).reshape(-1, 1)
|
||||
num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32).reshape(-1, 1)
|
||||
offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32)
|
||||
|
||||
# load signal, add back delay and pre-emphasize
|
||||
signal = np.fromfile(os.path.join(path, 'noisy.s16'), dtype=np.int16).astype(np.float32) / (2 ** 15)
|
||||
signal = np.concatenate((np.zeros(skip, dtype=np.float32), signal), dtype=np.float32)
|
||||
|
||||
create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_offset, add_double_lag_acorr)
|
||||
create_features = silk_feature_factory(no_pitch_value, acorr_radius, pitch_hangover, num_bands_clean_spec, num_bands_noisy_spec, noisy_spec_scale, noisy_apply_dct, add_double_lag_acorr)
|
||||
|
||||
num_frames = min((len(signal) // 320) * 4, len(lpcs))
|
||||
signal = signal[: num_frames * 80]
|
||||
|
@ -138,11 +133,10 @@ def load_inference_data(path,
|
|||
periods = periods[: num_frames]
|
||||
num_bits = num_bits[: num_frames // 4]
|
||||
num_bits_smooth = num_bits[: num_frames // 4]
|
||||
offsets = offsets[: num_frames]
|
||||
|
||||
numbits = np.repeat(np.concatenate((num_bits, num_bits_smooth), axis=-1, dtype=np.float32), 4, axis=0)
|
||||
|
||||
features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods, offsets)
|
||||
features, periods = create_features(signal, np.zeros(350, dtype=signal.dtype), lpcs, gains, ltps, periods)
|
||||
|
||||
if preemph > 0:
|
||||
signal[1:] -= preemph * signal[:-1]
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
import math as m
|
||||
import numpy as np
|
||||
import scipy
|
||||
import scipy.fftpack
|
||||
import torch
|
||||
|
||||
def erb(f):
|
||||
|
|
|
@ -38,7 +38,8 @@ class CWriter:
|
|||
create_state_struct=False,
|
||||
enable_binary_blob=True,
|
||||
model_struct_name="Model",
|
||||
nnet_header="nnet.h"):
|
||||
nnet_header="nnet.h",
|
||||
add_typedef=False):
|
||||
"""
|
||||
Writer class for creating souce and header files for weight exports to C
|
||||
|
||||
|
@ -73,6 +74,7 @@ class CWriter:
|
|||
self.enable_binary_blob = enable_binary_blob
|
||||
self.create_state_struct = create_state_struct
|
||||
self.model_struct_name = model_struct_name
|
||||
self.add_typedef = add_typedef
|
||||
|
||||
# for binary blob format, format is key=<layer name>, value=(<layer type>, <init call>)
|
||||
self.layer_dict = OrderedDict()
|
||||
|
@ -119,11 +121,17 @@ f"""
|
|||
|
||||
# create model type
|
||||
if self.enable_binary_blob:
|
||||
self.header.write(f"\nstruct {self.model_struct_name} {{")
|
||||
if self.add_typedef:
|
||||
self.header.write(f"\ntypedef struct {{")
|
||||
else:
|
||||
self.header.write(f"\nstruct {self.model_struct_name} {{")
|
||||
for name, data in self.layer_dict.items():
|
||||
layer_type = data[0]
|
||||
self.header.write(f"\n {layer_type} {name};")
|
||||
self.header.write(f"\n}};\n")
|
||||
if self.add_typedef:
|
||||
self.header.write(f"\n}} {self.model_struct_name};\n")
|
||||
else:
|
||||
self.header.write(f"\n}};\n")
|
||||
|
||||
init_prototype = f"int init_{self.model_struct_name.lower()}({self.model_struct_name} *model, const WeightArray *arrays)"
|
||||
self.header.write(f"\n{init_prototype};\n")
|
||||
|
|
|
@ -34,3 +34,4 @@ from .torch import dump_torch_gru_weights, load_torch_gru_weights
|
|||
from .torch import dump_torch_grucell_weights
|
||||
from .torch import dump_torch_embedding_weights, load_torch_embedding_weights
|
||||
from .torch import dump_torch_weights, load_torch_weights
|
||||
from .torch import dump_torch_adaptive_conv1d_weights
|
|
@ -28,12 +28,154 @@
|
|||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
sys.path.append(sys.path.append(os.path.join(os.path.dirname(__file__), '../osce')))
|
||||
try:
|
||||
import utils.layers as osce_layers
|
||||
from utils.layers.limited_adaptive_conv1d import LimitedAdaptiveConv1d
|
||||
from utils.layers.limited_adaptive_comb1d import LimitedAdaptiveComb1d
|
||||
from utils.layers.td_shaper import TDShaper
|
||||
has_osce=True
|
||||
except:
|
||||
has_osce=False
|
||||
|
||||
from wexchange.c_export import CWriter, print_gru_layer, print_dense_layer, print_conv1d_layer, print_tconv1d_layer, print_conv2d_layer
|
||||
|
||||
def dump_torch_adaptive_conv1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
|
||||
|
||||
|
||||
w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
|
||||
b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
|
||||
w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
|
||||
b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
# pad kernel for quantization
|
||||
left_padding = adaconv.padding[0]
|
||||
kernel_size = adaconv.kernel_size
|
||||
in_channels = adaconv.in_channels
|
||||
out_channels = adaconv.out_channels
|
||||
feature_dim = adaconv.feature_dim
|
||||
|
||||
if quantize and kernel_size % 8:
|
||||
kernel_padding = 8 - (kernel_size % 8)
|
||||
w_kernel = np.concatenate(
|
||||
(np.zeros((out_channels, in_channels, kernel_padding, feature_dim)), w_kernel.reshape(out_channels, in_channels, kernel_size, feature_dim)),
|
||||
dtype=w_kernel.dtype,
|
||||
axis=2).reshape(-1, feature_dim)
|
||||
b_kernel = np.concatenate(
|
||||
(np.zeros((out_channels, in_channels, kernel_padding)), b_kernel.reshape(out_channels, in_channels, kernel_size)),
|
||||
dtype=b_kernel.dtype,
|
||||
axis=2).reshape(-1)
|
||||
left_padding += kernel_padding
|
||||
kernel_size += kernel_padding
|
||||
|
||||
# write relevant scalar parameters to header file
|
||||
where.header.write(f"""
|
||||
#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
|
||||
#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
|
||||
#define {name.upper()}_SHAPE_GAIN {adaconv.shape_gain:f}f
|
||||
#define {name.upper()}_KERNEL_SIZE {kernel_size}
|
||||
#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
|
||||
#define {name.upper()}_LEFT_PADDING {left_padding}
|
||||
#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
|
||||
#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
|
||||
#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
|
||||
#define {name.upper()}_NORM_P {adaconv.norm_p}
|
||||
#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
|
||||
"""
|
||||
)
|
||||
|
||||
print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
|
||||
print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
|
||||
|
||||
|
||||
else:
|
||||
np.save(where, 'weight_kernel.npy', w_kernel)
|
||||
np.save(where, 'bias_kernel.npy', b_kernel)
|
||||
np.save(where, 'weight_gain.npy', w_gain)
|
||||
np.save(where, 'bias_gain.npy', b_gain)
|
||||
|
||||
|
||||
def dump_torch_adaptive_comb1d_weights(where, adaconv, name='adaconv', scale=1/128, quantize=False):
|
||||
|
||||
|
||||
w_kernel = adaconv.conv_kernel.weight.detach().cpu().numpy().copy()
|
||||
b_kernel = adaconv.conv_kernel.bias.detach().cpu().numpy().copy()
|
||||
w_gain = adaconv.filter_gain.weight.detach().cpu().numpy().copy()
|
||||
b_gain = adaconv.filter_gain.bias.detach().cpu().numpy().copy()
|
||||
w_global_gain = adaconv.global_filter_gain.weight.detach().cpu().numpy().copy()
|
||||
b_global_gain = adaconv.global_filter_gain.bias.detach().cpu().numpy().copy()
|
||||
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
# pad kernel for quantization
|
||||
left_padding = adaconv.padding[0]
|
||||
kernel_size = adaconv.kernel_size
|
||||
|
||||
if quantize and w_kernel.shape[0] % 8:
|
||||
kernel_padding = 8 - (w_kernel.shape[0] % 8)
|
||||
w_kernel = np.concatenate((np.zeros((kernel_padding, w_kernel.shape[1])), w_kernel), dtype=w_kernel.dtype)
|
||||
b_kernel = np.concatenate((np.zeros((kernel_padding)), b_kernel), dtype=b_kernel.dtype)
|
||||
left_padding += kernel_padding
|
||||
kernel_size += kernel_padding
|
||||
# write relevant scalar parameters to header file
|
||||
where.header.write(f"""
|
||||
#define {name.upper()}_FILTER_GAIN_A {adaconv.filter_gain_a:f}f
|
||||
#define {name.upper()}_FILTER_GAIN_B {adaconv.filter_gain_b:f}f
|
||||
#define {name.upper()}_LOG_GAIN_LIMIT {adaconv.log_gain_limit:f}f
|
||||
#define {name.upper()}_KERNEL_SIZE {kernel_size}
|
||||
#define {name.upper()}_LEFT_PADDING {left_padding}
|
||||
#define {name.upper()}_FRAME_SIZE {adaconv.frame_size}
|
||||
#define {name.upper()}_OVERLAP_SIZE {adaconv.overlap_size}
|
||||
#define {name.upper()}_IN_CHANNELS {adaconv.in_channels}
|
||||
#define {name.upper()}_OUT_CHANNELS {adaconv.out_channels}
|
||||
#define {name.upper()}_NORM_P {adaconv.norm_p}
|
||||
#define {name.upper()}_FEATURE_DIM {adaconv.feature_dim}
|
||||
#define {name.upper()}_MAX_LAG {adaconv.max_lag}
|
||||
"""
|
||||
)
|
||||
|
||||
print_dense_layer(where, name + "_kernel", w_kernel, b_kernel, scale=scale, format='torch', sparse=False, diagonal=False, quantize=quantize)
|
||||
print_dense_layer(where, name + "_gain", w_gain, b_gain, format='torch', sparse=False, diagonal=False, quantize=False)
|
||||
print_dense_layer(where, name + "_global_gain", w_global_gain, b_global_gain, format='torch', sparse=False, diagonal=False, quantize=False)
|
||||
|
||||
|
||||
else:
|
||||
np.save(where, 'weight_kernel.npy', w_kernel)
|
||||
np.save(where, 'bias_kernel.npy', b_kernel)
|
||||
np.save(where, 'weight_gain.npy', w_gain)
|
||||
np.save(where, 'bias_gain.npy', b_gain)
|
||||
np.save(where, 'weight_global_gain.npy', w_global_gain)
|
||||
np.save(where, 'bias_global_gain.npy', b_global_gain)
|
||||
|
||||
def dump_torch_tdshaper(where, shaper, name='tdshaper'):
|
||||
|
||||
if isinstance(where, CWriter):
|
||||
where.header.write(f"""
|
||||
#define {name.upper()}_FEATURE_DIM {shaper.feature_dim}
|
||||
#define {name.upper()}_FRAME_SIZE {shaper.frame_size}
|
||||
#define {name.upper()}_AVG_POOL_K {shaper.avg_pool_k}
|
||||
#define {name.upper()}_INNOVATE {1 if shaper.innovate else 0}
|
||||
#define {name.upper()}_POOL_AFTER {1 if shaper.pool_after else 0}
|
||||
"""
|
||||
)
|
||||
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1, name + "_alpha1")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha2, name + "_alpha2")
|
||||
|
||||
if shaper.innovate:
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1b, name + "_alpha1b")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha1c, name + "_alpha1c")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha2b, name + "_alpha2b")
|
||||
dump_torch_conv1d_weights(where, shaper.feature_alpha2c, name + "_alpha2c")
|
||||
|
||||
|
||||
|
||||
def dump_torch_gru_weights(where, gru, name='gru', input_sparse=False, recurrent_sparse=False, quantize=False, scale=1/128, recurrent_scale=1/128):
|
||||
|
||||
assert gru.num_layers == 1
|
||||
|
@ -221,7 +363,6 @@ def load_torch_conv2d_weights(where, conv):
|
|||
|
||||
def dump_torch_embedding_weights(where, embed, name='embed', scale=1/128, sparse=False, diagonal=False, quantize=False):
|
||||
|
||||
print("quantize = ", quantize)
|
||||
w = embed.weight.detach().cpu().numpy().copy().transpose()
|
||||
b = np.zeros(w.shape[0], dtype=w.dtype)
|
||||
|
||||
|
@ -257,11 +398,21 @@ def dump_torch_weights(where, module, name=None, verbose=False, **kwargs):
|
|||
elif isinstance(module, torch.nn.Conv2d):
|
||||
return dump_torch_conv2d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.Embedding):
|
||||
return dump_torch_embedding_weights(where, module)
|
||||
return dump_torch_embedding_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, torch.nn.ConvTranspose1d):
|
||||
return dump_torch_tconv1d_weights(where, module, name, **kwargs)
|
||||
else:
|
||||
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
|
||||
if has_osce:
|
||||
if isinstance(module, LimitedAdaptiveConv1d):
|
||||
dump_torch_adaptive_conv1d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, LimitedAdaptiveComb1d):
|
||||
dump_torch_adaptive_comb1d_weights(where, module, name, **kwargs)
|
||||
elif isinstance(module, TDShaper):
|
||||
dump_torch_tdshaper(where, module, name, **kwargs)
|
||||
else:
|
||||
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
|
||||
else:
|
||||
raise ValueError(f'dump_torch_weights: layer of type {type(module)} not supported')
|
||||
|
||||
def load_torch_weights(where, module):
|
||||
""" generic function for loading weights of some torch.nn.Module """
|
||||
|
|
|
@ -46,6 +46,10 @@
|
|||
#include "plc_data.c"
|
||||
#include "dred_rdovae_enc_data.c"
|
||||
#include "dred_rdovae_dec_data.c"
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "lace_data.c"
|
||||
#include "nolace_data.c"
|
||||
#endif
|
||||
|
||||
void write_weights(const WeightArray *list, FILE *fout)
|
||||
{
|
||||
|
@ -53,6 +57,9 @@ void write_weights(const WeightArray *list, FILE *fout)
|
|||
unsigned char zeros[WEIGHT_BLOCK_SIZE] = {0};
|
||||
while (list[i].name != NULL) {
|
||||
WeightHead h;
|
||||
if (strlen(list[i].name) >= sizeof(h.name) - 1) {
|
||||
printf("[write_weights] warning: name %s too long\n", list[i].name);
|
||||
}
|
||||
memcpy(h.head, "DNNw", 4);
|
||||
h.version = WEIGHT_BLOB_VERSION;
|
||||
h.type = list[i].type;
|
||||
|
@ -77,6 +84,14 @@ int main(void)
|
|||
write_weights(lpcnet_plc_arrays, fout);
|
||||
write_weights(rdovaeenc_arrays, fout);
|
||||
write_weights(rdovaedec_arrays, fout);
|
||||
#ifdef ENABLE_OSCE
|
||||
#ifndef DISABLE_LACE
|
||||
write_weights(lacelayers_arrays, fout);
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
write_weights(nolacelayers_arrays, fout);
|
||||
#endif
|
||||
#endif
|
||||
fclose(fout);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -29,3 +29,12 @@ dnn/dred_rdovae_enc_data.h \
|
|||
dnn/dred_rdovae_dec.h \
|
||||
dnn/dred_rdovae_dec_data.h \
|
||||
dnn/dred_rdovae_stats_data.h
|
||||
|
||||
OSCE_HEAD= \
|
||||
dnn/osce.h \
|
||||
dnn/osce_config.h \
|
||||
dnn/osce_structs.h \
|
||||
dnn/osce_features.h \
|
||||
dnn/nndsp.h \
|
||||
dnn/lace_data.h \
|
||||
dnn/nolace_data.h
|
||||
|
|
|
@ -23,6 +23,13 @@ silk/dred_encoder.c \
|
|||
silk/dred_coding.c \
|
||||
silk/dred_decoder.c
|
||||
|
||||
OSCE_SOURCES = \
|
||||
dnn/osce.c \
|
||||
dnn/osce_features.c \
|
||||
dnn/nndsp.c \
|
||||
dnn/lace_data.c \
|
||||
dnn/nolace_data.c
|
||||
|
||||
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
|
||||
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
|
||||
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
|
||||
|
|
|
@ -148,6 +148,7 @@ opts = [
|
|||
[ 'float-approx', 'FLOAT_APPROX' ],
|
||||
[ 'enable-deep-plc', 'ENABLE_DEEP_PLC' ],
|
||||
[ 'enable-dred', 'ENABLE_DRED' ],
|
||||
[ 'enable-osce', 'ENABLE_OSCE' ],
|
||||
[ 'assertions', 'ENABLE_ASSERTIONS' ],
|
||||
[ 'hardening', 'ENABLE_HARDENING' ],
|
||||
[ 'fuzzing', 'FUZZING' ],
|
||||
|
|
|
@ -9,6 +9,7 @@ option('intrinsics', type : 'feature', value : 'auto', description : 'Intrinsics
|
|||
|
||||
option('enable-deep-plc', type : 'boolean', value : false, description : 'Enable Deep Packet Loss Concealment (PLC)')
|
||||
option('enable-dred', type : 'boolean', value : false, description : 'Enable Deep Redundancy (DRED)')
|
||||
option('enable-osce', type : 'boolean', value : false, description : 'Enable Opus Speech Coding Enhancement (OSCE)')
|
||||
option('enable-dnn-debug-float', type : 'boolean', value : false, description : 'Compute DNN using float weights')
|
||||
|
||||
option('custom-modes', type : 'boolean', value : false, description : 'Enable non-Opus modes, e.g. 44.1 kHz & 2^n frames')
|
||||
|
|
16
silk/API.h
16
silk/API.h
|
@ -92,6 +92,16 @@ opus_int silk_Encode( /* O Returns error co
|
|||
/* Decoder functions */
|
||||
/****************************************/
|
||||
|
||||
|
||||
/***********************************************/
|
||||
/* Load OSCE models from external data pointer */
|
||||
/***********************************************/
|
||||
opus_int silk_LoadOSCEModels(
|
||||
void *decState, /* O I/O State */
|
||||
const unsigned char *data, /* I pointer to binary blob */
|
||||
int len /* I length of binary blob data */
|
||||
);
|
||||
|
||||
/***********************************************/
|
||||
/* Get size in bytes of the Silk decoder state */
|
||||
/***********************************************/
|
||||
|
@ -100,8 +110,12 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
|
|||
);
|
||||
|
||||
/*************************/
|
||||
/* Init or Reset decoder */
|
||||
/* Init and Reset decoder */
|
||||
/*************************/
|
||||
opus_int silk_ResetDecoder( /* O Returns error code */
|
||||
void *decState /* I/O State */
|
||||
);
|
||||
|
||||
opus_int silk_InitDecoder( /* O Returns error code */
|
||||
void *decState /* I/O State */
|
||||
);
|
||||
|
|
|
@ -147,6 +147,11 @@ typedef struct {
|
|||
|
||||
/* I: Enable Deep PLC */
|
||||
opus_int enable_deep_plc;
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
/* I: OSCE method */
|
||||
opus_int osce_method;
|
||||
#endif
|
||||
} silk_DecControlStruct;
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "stack_alloc.h"
|
||||
#include "os_support.h"
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce.h"
|
||||
#include "osce_structs.h"
|
||||
#endif
|
||||
|
||||
/************************/
|
||||
/* Decoder Super Struct */
|
||||
/************************/
|
||||
|
@ -42,12 +47,33 @@ typedef struct {
|
|||
opus_int nChannelsAPI;
|
||||
opus_int nChannelsInternal;
|
||||
opus_int prev_decode_only_middle;
|
||||
#ifdef ENABLE_OSCE
|
||||
OSCEModel osce_model;
|
||||
#endif
|
||||
} silk_decoder;
|
||||
|
||||
/*********************/
|
||||
/* Decoder functions */
|
||||
/*********************/
|
||||
|
||||
|
||||
|
||||
opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len)
|
||||
{
|
||||
#ifdef ENABLE_OSCE
|
||||
opus_int ret = SILK_NO_ERROR;
|
||||
|
||||
ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len);
|
||||
|
||||
return ret;
|
||||
#else
|
||||
(void) decState;
|
||||
(void) data;
|
||||
(void) len;
|
||||
return SILK_NO_ERROR;
|
||||
#endif
|
||||
}
|
||||
|
||||
opus_int silk_Get_Decoder_Size( /* O Returns error code */
|
||||
opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
|
||||
)
|
||||
|
@ -60,6 +86,24 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
|
|||
}
|
||||
|
||||
/* Reset decoder state */
|
||||
opus_int silk_ResetDecoder( /* O Returns error code */
|
||||
void *decState /* I/O State */
|
||||
)
|
||||
{
|
||||
opus_int n, ret = SILK_NO_ERROR;
|
||||
silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
|
||||
|
||||
for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
|
||||
ret = silk_reset_decoder( &channel_state[ n ] );
|
||||
}
|
||||
silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
|
||||
/* Not strictly needed, but it's cleaner that way */
|
||||
((silk_decoder *)decState)->prev_decode_only_middle = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
opus_int silk_InitDecoder( /* O Returns error code */
|
||||
void *decState /* I/O State */
|
||||
)
|
||||
|
@ -67,6 +111,11 @@ opus_int silk_InitDecoder( /* O Returns error co
|
|||
opus_int n, ret = SILK_NO_ERROR;
|
||||
silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
|
||||
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
/* load osce models */
|
||||
silk_LoadOSCEModels(decState, NULL, 0);
|
||||
#endif
|
||||
|
||||
for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
|
||||
ret = silk_init_decoder( &channel_state[ n ] );
|
||||
}
|
||||
|
@ -301,9 +350,17 @@ opus_int silk_Decode( /* O Returns error co
|
|||
} else {
|
||||
condCoding = CODE_CONDITIONALLY;
|
||||
}
|
||||
#ifdef ENABLE_OSCE
|
||||
if ( channel_state[n].osce.method != decControl->osce_method ) {
|
||||
osce_reset( &channel_state[n].osce, decControl->osce_method );
|
||||
}
|
||||
#endif
|
||||
ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding,
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
n == 0 ? lpcnet : NULL,
|
||||
#endif
|
||||
#ifdef ENABLE_OSCE
|
||||
&psDec->osce_model,
|
||||
#endif
|
||||
arch);
|
||||
} else {
|
||||
|
|
|
@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "stack_alloc.h"
|
||||
#include "PLC.h"
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce.h"
|
||||
#endif
|
||||
|
||||
/****************/
|
||||
/* Decode frame */
|
||||
/****************/
|
||||
|
@ -45,17 +49,26 @@ opus_int silk_decode_frame(
|
|||
opus_int condCoding, /* I The type of conditional coding to use */
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
LPCNetPLCState *lpcnet,
|
||||
#endif
|
||||
#ifdef ENABLE_OSCE
|
||||
OSCEModel *osce_model,
|
||||
#endif
|
||||
int arch /* I Run-time architecture */
|
||||
)
|
||||
{
|
||||
VARDECL( silk_decoder_control, psDecCtrl );
|
||||
opus_int L, mv_len, ret = 0;
|
||||
#ifdef ENABLE_OSCE
|
||||
opus_int32 ec_start;
|
||||
#endif
|
||||
SAVE_STACK;
|
||||
|
||||
L = psDec->frame_length;
|
||||
ALLOC( psDecCtrl, 1, silk_decoder_control );
|
||||
psDecCtrl->LTP_scale_Q14 = 0;
|
||||
#ifdef ENABLE_OSCE
|
||||
ec_start = ec_tell(psRangeDec);
|
||||
#endif
|
||||
|
||||
/* Safety checks */
|
||||
celt_assert( L > 0 && L <= MAX_FRAME_LENGTH );
|
||||
|
@ -87,6 +100,21 @@ opus_int silk_decode_frame(
|
|||
/********************************************************/
|
||||
silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
|
||||
|
||||
/*************************/
|
||||
/* Update output buffer. */
|
||||
/*************************/
|
||||
celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
|
||||
mv_len = psDec->ltp_mem_length - psDec->frame_length;
|
||||
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
|
||||
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
/********************************************************/
|
||||
/* Run SILK enhancer */
|
||||
/********************************************************/
|
||||
osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
|
||||
#endif
|
||||
|
||||
/********************************************************/
|
||||
/* Update PLC state */
|
||||
/********************************************************/
|
||||
|
@ -109,15 +137,18 @@ opus_int silk_decode_frame(
|
|||
lpcnet,
|
||||
#endif
|
||||
arch );
|
||||
}
|
||||
|
||||
/*************************/
|
||||
/* Update output buffer. */
|
||||
/*************************/
|
||||
celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
|
||||
mv_len = psDec->ltp_mem_length - psDec->frame_length;
|
||||
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
|
||||
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
|
||||
#ifdef ENABLE_OSCE
|
||||
osce_reset( &psDec->osce, psDec->osce.method );
|
||||
#endif
|
||||
/*************************/
|
||||
/* Update output buffer. */
|
||||
/*************************/
|
||||
celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
|
||||
mv_len = psDec->ltp_mem_length - psDec->frame_length;
|
||||
silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
|
||||
silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
|
||||
}
|
||||
|
||||
/************************************************/
|
||||
/* Comfort noise generation / estimation */
|
||||
|
|
|
@ -31,15 +31,21 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "main.h"
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce.h"
|
||||
#endif
|
||||
|
||||
#include "structs.h"
|
||||
|
||||
/************************/
|
||||
/* Init Decoder State */
|
||||
/* Reset Decoder State */
|
||||
/************************/
|
||||
opus_int silk_init_decoder(
|
||||
opus_int silk_reset_decoder(
|
||||
silk_decoder_state *psDec /* I/O Decoder state pointer */
|
||||
)
|
||||
{
|
||||
/* Clear the entire encoder state, except anything copied */
|
||||
silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
|
||||
silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) );
|
||||
|
||||
/* Used to deactivate LSF interpolation */
|
||||
psDec->first_frame_after_reset = 1;
|
||||
|
@ -52,6 +58,27 @@ opus_int silk_init_decoder(
|
|||
/* Reset PLC state */
|
||||
silk_PLC_Reset( psDec );
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
/* Reset OSCE state and method */
|
||||
osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/************************/
|
||||
/* Init Decoder State */
|
||||
/************************/
|
||||
opus_int silk_init_decoder(
|
||||
silk_decoder_state *psDec /* I/O Decoder state pointer */
|
||||
)
|
||||
{
|
||||
/* Clear the entire encoder state, except anything copied */
|
||||
silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
|
||||
|
||||
silk_reset_decoder( psDec );
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -389,6 +389,10 @@ void silk_NLSF_decode(
|
|||
/****************************************************/
|
||||
/* Decoder Functions */
|
||||
/****************************************************/
|
||||
opus_int silk_reset_decoder(
|
||||
silk_decoder_state *psDec /* I/O Decoder state pointer */
|
||||
);
|
||||
|
||||
opus_int silk_init_decoder(
|
||||
silk_decoder_state *psDec /* I/O Decoder state pointer */
|
||||
);
|
||||
|
@ -412,6 +416,9 @@ opus_int silk_decode_frame(
|
|||
opus_int condCoding, /* I The type of conditional coding to use */
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
LPCNetPLCState *lpcnet,
|
||||
#endif
|
||||
#ifdef ENABLE_OSCE
|
||||
OSCEModel *osce_model,
|
||||
#endif
|
||||
int arch /* I Run-time architecture */
|
||||
);
|
||||
|
|
|
@ -44,6 +44,11 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dred_decoder.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce_config.h"
|
||||
#include "osce_structs.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
|
@ -238,6 +243,14 @@ typedef struct {
|
|||
} silk_encoder_state;
|
||||
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
typedef struct {
|
||||
OSCEFeatureState features;
|
||||
OSCEState state;
|
||||
int method;
|
||||
} silk_OSCE_struct;
|
||||
#endif
|
||||
|
||||
/* Struct for Packet Loss Concealment */
|
||||
typedef struct {
|
||||
opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */
|
||||
|
@ -270,6 +283,10 @@ typedef struct {
|
|||
/* Decoder state */
|
||||
/********************************/
|
||||
typedef struct {
|
||||
#ifdef ENABLE_OSCE
|
||||
silk_OSCE_struct osce;
|
||||
#endif
|
||||
#define SILK_DECODER_STATE_RESET_START prev_gain_Q16
|
||||
opus_int32 prev_gain_Q16;
|
||||
opus_int32 exc_Q14[ MAX_FRAME_LENGTH ];
|
||||
opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ];
|
||||
|
|
|
@ -161,4 +161,4 @@ silk/float/schur_FLP.c \
|
|||
silk/float/sort_FLP.c
|
||||
|
||||
SILK_SOURCES_FLOAT_AVX2 = \
|
||||
silk/float/x86/inner_product_FLP_avx2.c
|
||||
silk/float/x86/inner_product_FLP_avx2.c
|
|
@ -57,6 +57,10 @@
|
|||
#include "dred_rdovae_dec.h"
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_OSCE
|
||||
#include "osce.h"
|
||||
#endif
|
||||
|
||||
struct OpusDecoder {
|
||||
int celt_dec_offset;
|
||||
int silk_dec_offset;
|
||||
|
@ -383,7 +387,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
|
|||
pcm_ptr = pcm_silk;
|
||||
|
||||
if (st->prev_mode==MODE_CELT_ONLY)
|
||||
silk_InitDecoder( silk_dec );
|
||||
silk_ResetDecoder( silk_dec );
|
||||
|
||||
/* The SILK PLC cannot produce frames of less than 10 ms */
|
||||
st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
|
||||
|
@ -408,6 +412,15 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
|
|||
}
|
||||
}
|
||||
st->DecControl.enable_deep_plc = st->complexity >= 5;
|
||||
#ifdef ENABLE_OSCE
|
||||
st->DecControl.osce_method = OSCE_METHOD_NONE;
|
||||
#ifndef DISABLE_LACE
|
||||
if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;}
|
||||
#endif
|
||||
#ifndef DISABLE_NOLACE
|
||||
if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
lost_flag = data == NULL ? 1 : 2 * !!decode_fec;
|
||||
decoded_samples = 0;
|
||||
|
@ -953,7 +966,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
|
|||
((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
|
||||
|
||||
celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
|
||||
silk_InitDecoder( silk_dec );
|
||||
silk_ResetDecoder( silk_dec );
|
||||
st->stream_channels = st->channels;
|
||||
st->frame_size = st->Fs/400;
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
|
@ -1044,6 +1057,7 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
|
|||
goto bad_arg;
|
||||
}
|
||||
ret = lpcnet_plc_load_model(&st->lpcnet, data, len);
|
||||
ret = silk_LoadOSCEModels(silk_dec, data, len) || ret;
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
|
|
@ -70,6 +70,10 @@ unsigned char *load_blob(const char *filename, int *len) {
|
|||
FILE *file;
|
||||
unsigned char *data;
|
||||
file = fopen(filename, "r");
|
||||
if (file == NULL)
|
||||
{
|
||||
perror("could not open blob file\n");
|
||||
}
|
||||
fseek(file, 0L, SEEK_END);
|
||||
*len = ftell(file);
|
||||
fseek(file, 0L, SEEK_SET);
|
||||
|
@ -254,6 +258,68 @@ static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *err
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ENABLE_OSCE_TRAINING_DATA
|
||||
#define COMPLEXITY_MIN 0
|
||||
#define COMPLEXITY_MAX 10
|
||||
|
||||
#define PACKET_LOSS_PERC_MIN 0
|
||||
#define PACKET_LOSS_PERC_MAX 50
|
||||
#define PACKET_LOSS_PERC_STEP 5
|
||||
|
||||
#define CBR_BITRATE_LIMIT 8000
|
||||
|
||||
#define NUM_BITRATES 102
|
||||
static int bitrates[NUM_BITRATES] = {
|
||||
6000, 6060, 6120, 6180, 6240, 6300, 6360, 6420, 6480,
|
||||
6525, 6561, 6598, 6634, 6670, 6707, 6743, 6780, 6816,
|
||||
6853, 6889, 6926, 6962, 6999, 7042, 7085, 7128, 7171,
|
||||
7215, 7258, 7301, 7344, 7388, 7431, 7474, 7512, 7541,
|
||||
7570, 7599, 7628, 7657, 7686, 7715, 7744, 7773, 7802,
|
||||
7831, 7860, 7889, 7918, 7947, 7976, 8013, 8096, 8179,
|
||||
8262, 8344, 8427, 8511, 8605, 8699, 8792, 8886, 8980,
|
||||
9100, 9227, 9354, 9480, 9561, 9634, 9706, 9779, 9851,
|
||||
9924, 9996, 10161, 10330, 10499, 10698, 10898, 11124, 11378,
|
||||
11575, 11719, 11862, 12014, 12345, 12751, 13195, 13561, 13795,
|
||||
14069, 14671, 15403, 15790, 16371, 17399, 17968, 19382, 20468,
|
||||
22000, 32000, 64000
|
||||
};
|
||||
|
||||
static int randint(int min, int max, int step)
|
||||
{
|
||||
double r = ((double) rand())/ (RAND_MAX + 1.);
|
||||
int d;
|
||||
|
||||
d = ((int) ((max + 1 - min) * r / step) * step) + min;
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
static void new_random_setting(OpusEncoder *enc)
|
||||
{
|
||||
int bitrate_bps;
|
||||
int complexity;
|
||||
int packet_loss_perc;
|
||||
int use_vbr;
|
||||
|
||||
bitrate_bps = bitrates[randint(0, NUM_BITRATES - 1, 1)];
|
||||
complexity = randint(COMPLEXITY_MIN, COMPLEXITY_MAX, 1);
|
||||
packet_loss_perc = randint(PACKET_LOSS_PERC_MIN, PACKET_LOSS_PERC_MAX, PACKET_LOSS_PERC_STEP);
|
||||
use_vbr = bitrate_bps < CBR_BITRATE_LIMIT ? 1 : randint(0, 1, 1);
|
||||
|
||||
if (1)
|
||||
{
|
||||
printf("changing settings to %d\t%d\t%d\t%d\n", bitrate_bps, complexity, packet_loss_perc, use_vbr);
|
||||
}
|
||||
|
||||
opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
|
||||
opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity));
|
||||
opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc));
|
||||
opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int err;
|
||||
|
@ -316,6 +382,10 @@ int main(int argc, char *argv[])
|
|||
int lost_count=0;
|
||||
FILE *packet_loss_file=NULL;
|
||||
int dred_duration=0;
|
||||
#ifdef ENABLE_OSCE_TRAINING_DATA
|
||||
int silk_random_switching = 0;
|
||||
int silk_frame_counter = 0;
|
||||
#endif
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
int blob_len;
|
||||
unsigned char *blob_data;
|
||||
|
@ -546,6 +616,12 @@ int main(int argc, char *argv[])
|
|||
mode_list = celt_hq_test;
|
||||
nb_modes_in_list = 4;
|
||||
args++;
|
||||
#ifdef ENABLE_OSCE_TRAINING_DATA
|
||||
} else if( strcmp( argv[ args ], "-silk_random_switching" ) == 0 ){
|
||||
silk_random_switching = atoi( argv[ args + 1 ] );
|
||||
printf("switching encoding parameters every %dth frame\n", silk_random_switching);
|
||||
args += 2;
|
||||
#endif
|
||||
} else {
|
||||
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
|
||||
print_usage( argv );
|
||||
|
@ -759,6 +835,15 @@ int main(int argc, char *argv[])
|
|||
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
|
||||
frame_size = mode_list[curr_mode][2];
|
||||
}
|
||||
#ifdef ENABLE_OSCE_TRAINING_DATA
|
||||
if (silk_random_switching)
|
||||
{
|
||||
silk_frame_counter += 1;
|
||||
if (silk_frame_counter % silk_random_switching == 0) {
|
||||
new_random_setting(enc);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
num_read = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
|
||||
curr_read = (int)num_read;
|
||||
tot_in += curr_read;
|
||||
|
|
|
@ -50,6 +50,9 @@
|
|||
#else
|
||||
#include "float/structs_FLP.h"
|
||||
#endif
|
||||
#ifdef ENABLE_OSCE_TRAINING_DATA
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#define MAX_ENCODER_BUFFER 480
|
||||
|
||||
|
@ -1693,6 +1696,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
if (st->application == OPUS_APPLICATION_VOIP)
|
||||
{
|
||||
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
|
||||
|
||||
#ifdef ENABLE_OSCE_TRAINING_DATA
|
||||
/* write out high pass filtered clean signal*/
|
||||
static FILE *fout =NULL;
|
||||
if (fout == NULL)
|
||||
{
|
||||
fout = fopen("clean_hp.s16", "wb");
|
||||
}
|
||||
|
||||
{
|
||||
int idx;
|
||||
opus_int16 tmp;
|
||||
for (idx = 0; idx < frame_size; idx++)
|
||||
{
|
||||
tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
|
||||
fwrite(&tmp, sizeof(tmp), 1, fout);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
|
||||
}
|
||||
|
@ -2909,7 +2931,9 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
|
|||
{
|
||||
goto bad_arg;
|
||||
}
|
||||
#ifdef ENABLE_DRED
|
||||
ret = dred_encoder_load_model(&st->dred_encoder, data, len);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
|
|
@ -103,7 +103,7 @@ opus_int32 test_dec_api(void)
|
|||
for(c=0;c<4;c++)
|
||||
{
|
||||
i=opus_decoder_get_size(c);
|
||||
if(((c==1||c==2)&&(i<=2048||i>1<<17))||((c!=1&&c!=2)&&i!=0))test_failed();
|
||||
if(((c==1||c==2)&&(i<=2048||i>1<<18))||((c!=1&&c!=2)&&i!=0))test_failed();
|
||||
fprintf(stdout," opus_decoder_get_size(%d)=%d ...............%s OK.\n",c,i,i>0?"":"....");
|
||||
cfgs++;
|
||||
}
|
||||
|
@ -367,7 +367,7 @@ opus_int32 test_msdec_api(void)
|
|||
for(b=-1;b<4;b++)
|
||||
{
|
||||
i=opus_multistream_decoder_get_size(a,b);
|
||||
if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<17)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
|
||||
if(((a>0&&b<=a&&b>=0)&&(i<=2048||i>((1<<18)*a)))||((a<1||b>a||b<0)&&i!=0))test_failed();
|
||||
fprintf(stdout," opus_multistream_decoder_get_size(%2d,%2d)=%d %sOK.\n",a,b,i,i>0?"":"... ");
|
||||
cfgs++;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue