mirror of
https://github.com/xiph/opus.git
synced 2025-05-15 07:58:29 +00:00
Adding RTCD for DNN code
Starting with compute_linear()
This commit is contained in:
parent
b0620c0bf9
commit
2e034f6f31
31 changed files with 539 additions and 165 deletions
19
Makefile.am
19
Makefile.am
|
@ -50,18 +50,30 @@ if CPU_X86
|
|||
if HAVE_RTCD
|
||||
CELT_SOURCES += $(CELT_SOURCES_X86_RTCD)
|
||||
SILK_SOURCES += $(SILK_SOURCES_X86_RTCD)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_X86_RTCD)
|
||||
endif
|
||||
endif
|
||||
if HAVE_SSE
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE)
|
||||
endif
|
||||
if HAVE_SSE2
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE2)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_SSE2)
|
||||
endif
|
||||
endif
|
||||
if HAVE_SSE4_1
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_SSE4_1)
|
||||
endif
|
||||
endif
|
||||
if HAVE_AVX2
|
||||
CELT_SOURCES += $(CELT_SOURCES_AVX2)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_AVX2)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -398,19 +410,22 @@ $(SSE_OBJ): CFLAGS += $(OPUS_X86_SSE_CFLAGS)
|
|||
endif
|
||||
|
||||
if HAVE_SSE2
|
||||
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo)
|
||||
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo) \
|
||||
$(DNN_SOURCES_SSE2:.c=.lo)
|
||||
$(SSE2_OBJ): CFLAGS += $(OPUS_X86_SSE2_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_SSE4_1
|
||||
SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
|
||||
$(DNN_SOURCES_SSE4_1:.c=.lo) \
|
||||
$(SILK_SOURCES_SSE4_1:.c=.lo) \
|
||||
$(SILK_SOURCES_FIXED_SSE4_1:.c=.lo)
|
||||
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_AVX2
|
||||
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo)
|
||||
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
|
||||
$(DNN_SOURCES_AVX2:.c=.lo)
|
||||
$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
|
||||
endif
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
# define MAY_HAVE_AVX2(name) name ## _avx
|
||||
# define MAY_HAVE_AVX2(name) name ## _avx2
|
||||
# else
|
||||
# define MAY_HAVE_AVX2(name) name ## _c
|
||||
# endif
|
||||
|
|
|
@ -42,33 +42,35 @@ static void conv1_cond_init(float *mem, int len, int dilation, int *init)
|
|||
*init = 1;
|
||||
}
|
||||
|
||||
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents)
|
||||
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
|
||||
{
|
||||
int i;
|
||||
RDOVAEDecState dec;
|
||||
memset(&dec, 0, sizeof(dec));
|
||||
dred_rdovae_dec_init_states(&dec, model, state);
|
||||
dred_rdovae_dec_init_states(&dec, model, state, arch);
|
||||
for (i = 0; i < 2*nb_latents; i += 2)
|
||||
{
|
||||
dred_rdovae_decode_qframe(
|
||||
&dec,
|
||||
model,
|
||||
&features[2*i*DRED_NUM_FEATURES],
|
||||
&latents[(i/2)*DRED_LATENT_DIM]);
|
||||
&latents[(i/2)*DRED_LATENT_DIM],
|
||||
arch);
|
||||
}
|
||||
}
|
||||
|
||||
void dred_rdovae_dec_init_states(
|
||||
RDOVAEDecState *h, /* io: state buffer handle */
|
||||
const RDOVAEDec *model,
|
||||
const float *initial_state /* i: initial state */
|
||||
const float *initial_state, /* i: initial state */
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
|
||||
float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
|
||||
int counter=0;
|
||||
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
|
||||
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
|
||||
OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
|
||||
counter += DEC_GRU1_STATE_SIZE;
|
||||
OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
|
||||
|
@ -86,7 +88,8 @@ void dred_rdovae_decode_qframe(
|
|||
RDOVAEDecState *dec_state, /* io: state buffer handle */
|
||||
const RDOVAEDec *model,
|
||||
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
|
||||
const float *input /* i: latent vector */
|
||||
const float *input, /* i: latent vector */
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
|
||||
|
@ -94,43 +97,43 @@ void dred_rdovae_decode_qframe(
|
|||
int output_index = 0;
|
||||
|
||||
/* run encoder stack and concatenate output in buffer*/
|
||||
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_DENSE1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer);
|
||||
compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state);
|
||||
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
|
||||
output_index += DEC_GRU1_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer);
|
||||
compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state);
|
||||
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
|
||||
output_index += DEC_GRU2_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV2_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer);
|
||||
compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state);
|
||||
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
|
||||
output_index += DEC_GRU3_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV3_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer);
|
||||
compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state);
|
||||
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
|
||||
output_index += DEC_GRU4_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV4_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer);
|
||||
compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state);
|
||||
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
|
||||
compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
|
||||
output_index += DEC_GRU5_OUT_SIZE;
|
||||
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
|
||||
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += DEC_CONV5_OUT_SIZE;
|
||||
|
||||
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR);
|
||||
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
|
||||
}
|
||||
|
|
|
@ -46,8 +46,8 @@ struct RDOVAEDecStruct {
|
|||
float conv5_state[DEC_CONV5_STATE_SIZE];
|
||||
};
|
||||
|
||||
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state);
|
||||
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z);
|
||||
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents);
|
||||
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
|
||||
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
|
||||
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -50,7 +50,8 @@ void dred_rdovae_encode_dframe(
|
|||
const RDOVAEEnc *model,
|
||||
float *latents, /* o: latent vector */
|
||||
float *initial_state, /* o: initial state */
|
||||
const float *input /* i: double feature frame (concatenated) */
|
||||
const float *input, /* i: double feature frame (concatenated) */
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float padded_latents[DRED_PADDED_LATENT_DIM];
|
||||
|
@ -61,49 +62,49 @@ void dred_rdovae_encode_dframe(
|
|||
int output_index = 0;
|
||||
|
||||
/* run encoder stack and concatenate output in buffer*/
|
||||
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_DENSE1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer);
|
||||
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
|
||||
output_index += ENC_GRU1_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
|
||||
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV1_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer);
|
||||
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
|
||||
output_index += ENC_GRU2_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV2_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer);
|
||||
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
|
||||
output_index += ENC_GRU3_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV3_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer);
|
||||
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
|
||||
output_index += ENC_GRU4_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV4_OUT_SIZE;
|
||||
|
||||
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer);
|
||||
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
|
||||
OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
|
||||
output_index += ENC_GRU5_OUT_SIZE;
|
||||
conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH);
|
||||
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
|
||||
output_index += ENC_CONV5_OUT_SIZE;
|
||||
|
||||
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR);
|
||||
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
|
||||
OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
|
||||
|
||||
/* next, calculate initial state */
|
||||
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR);
|
||||
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
|
||||
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
|
||||
OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ struct RDOVAEEncStruct {
|
|||
float conv5_state[2*ENC_CONV5_STATE_SIZE];
|
||||
};
|
||||
|
||||
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input);
|
||||
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "lpcnet.h"
|
||||
#include "lpcnet_private.h"
|
||||
#include "os_support.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
|
||||
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
|
||||
|
@ -135,7 +136,9 @@ int main(int argc, char **argv) {
|
|||
FILE *fnoise = NULL;
|
||||
float noise_gain = 0;
|
||||
long noise_size=0;
|
||||
int arch;
|
||||
srand(getpid());
|
||||
arch = opus_select_arch();
|
||||
st = lpcnet_encoder_create();
|
||||
argv0=argv[0];
|
||||
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
|
||||
|
@ -244,7 +247,7 @@ int main(int argc, char **argv) {
|
|||
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
|
||||
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
|
||||
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
|
||||
compute_frame_features(st, x);
|
||||
compute_frame_features(st, x, arch);
|
||||
|
||||
if (fpcm) {
|
||||
compute_noise(noisebuf, noise_std);
|
||||
|
|
35
dnn/fargan.c
35
dnn/fargan.c
|
@ -36,6 +36,7 @@
|
|||
#include "pitch.h"
|
||||
#include "nnet.h"
|
||||
#include "lpcnet_private.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
#define FARGAN_FEATURES (NB_FEATURES)
|
||||
|
||||
|
@ -52,9 +53,9 @@ static void compute_fargan_cond(FARGANState *st, float *cond, const float *featu
|
|||
OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 224))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
|
||||
OPUS_COPY(dense_in, features, NB_FEATURES);
|
||||
|
||||
compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->cond_net_fconv1, conv2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->cond_net_fconv2, cond, st->cond_conv2_state, conv2_in, COND_NET_FCONV2_IN_SIZE, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
|
||||
compute_generic_conv1d(&model->cond_net_fconv1, conv2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
|
||||
compute_generic_conv1d(&model->cond_net_fconv2, cond, st->cond_conv2_state, conv2_in, COND_NET_FCONV2_IN_SIZE, ACTIVATION_TANH, st->arch);
|
||||
}
|
||||
|
||||
static void fargan_deemphasis(float *pcm, float *deemph_mem) {
|
||||
|
@ -84,7 +85,7 @@ static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond,
|
|||
celt_assert(st->cont_initialized);
|
||||
model = &st->model;
|
||||
|
||||
compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR);
|
||||
compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
|
||||
gain = exp(gain);
|
||||
gain_1 = 1.f/(1e-5f + gain);
|
||||
|
||||
|
@ -100,26 +101,26 @@ static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond,
|
|||
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
|
||||
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
|
||||
|
||||
compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH);
|
||||
compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
|
||||
celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
|
||||
compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in);
|
||||
compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
|
||||
|
||||
compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID);
|
||||
compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
|
||||
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
|
||||
OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in);
|
||||
compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state);
|
||||
compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
|
||||
compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
|
||||
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
|
||||
OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in);
|
||||
compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state);
|
||||
compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
|
||||
compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
|
||||
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
|
||||
OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in);
|
||||
compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state);
|
||||
compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
|
||||
compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
|
||||
|
||||
OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
|
||||
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
|
||||
|
@ -127,10 +128,10 @@ static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond,
|
|||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
|
||||
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
|
||||
|
||||
compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH);
|
||||
compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out);
|
||||
compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
|
||||
compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
|
||||
|
||||
compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
|
||||
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
|
||||
|
||||
OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
|
||||
|
@ -174,13 +175,13 @@ void fargan_init(FARGANState *st)
|
|||
{
|
||||
int ret;
|
||||
OPUS_CLEAR(st, 1);
|
||||
st->arch = opus_select_arch();
|
||||
#ifndef USE_WEIGHTS_FILE
|
||||
ret = init_fargan(&st->model, fargan_arrays);
|
||||
#else
|
||||
ret = 0;
|
||||
#endif
|
||||
celt_assert(ret == 0);
|
||||
/* FIXME: perform arch detection. */
|
||||
}
|
||||
|
||||
int fargan_load_model(FARGANState *st, const unsigned char *data, int len) {
|
||||
|
|
|
@ -120,7 +120,7 @@ int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf)
|
|||
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch);
|
||||
|
||||
|
||||
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
|
||||
|
@ -129,7 +129,7 @@ int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *p
|
|||
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch);
|
||||
|
||||
/** Gets the size of an <code>LPCNetState</code> structure.
|
||||
* @returns The size in bytes.
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "freq.h"
|
||||
#include "os_support.h"
|
||||
#include "fargan.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
# if __unix__
|
||||
|
@ -99,12 +100,14 @@ void usage(void) {
|
|||
|
||||
int main(int argc, char **argv) {
|
||||
int mode=0;
|
||||
int arch;
|
||||
FILE *fin, *fout;
|
||||
#ifdef USE_WEIGHTS_FILE
|
||||
int len;
|
||||
unsigned char *data;
|
||||
const char *filename = "weights_blob.bin";
|
||||
#endif
|
||||
arch = opus_select_arch();
|
||||
if (argc < 4) usage();
|
||||
if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
|
||||
else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
|
||||
|
@ -137,7 +140,7 @@ int main(int argc, char **argv) {
|
|||
size_t ret;
|
||||
ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
|
||||
if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
|
||||
lpcnet_compute_single_frame_features(net, pcm, features);
|
||||
lpcnet_compute_single_frame_features(net, pcm, features, arch);
|
||||
fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
|
||||
}
|
||||
lpcnet_encoder_destroy(net);
|
||||
|
|
|
@ -95,7 +95,7 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
|
|||
|
||||
#define celt_log10(x) (0.3010299957f*celt_log2(x))
|
||||
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in) {
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
|
||||
float aligned_in[FRAME_SIZE];
|
||||
int i;
|
||||
float Ly[NB_BANDS];
|
||||
|
@ -142,7 +142,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
|||
OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
|
||||
OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
|
||||
OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
|
||||
celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, st->arch);
|
||||
celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
|
||||
st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
|
||||
|
@ -152,7 +152,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
|||
{
|
||||
double ener1;
|
||||
float *buf = st->exc_buf;
|
||||
celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, st->arch);
|
||||
celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
|
||||
ener0 = celt_inner_prod_c(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE);
|
||||
ener1 = celt_inner_prod_c(&buf[0], &buf[0], FRAME_SIZE-1);
|
||||
/*printf("%f\n", st->frame_weight[sub]);*/
|
||||
|
@ -165,7 +165,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
|||
}
|
||||
/*printf("\n");*/
|
||||
}
|
||||
st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features);
|
||||
st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
|
||||
}
|
||||
|
||||
void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
|
||||
|
@ -196,26 +196,26 @@ void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
|
|||
}
|
||||
}
|
||||
|
||||
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES]) {
|
||||
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
|
||||
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
compute_frame_features(st, x);
|
||||
compute_frame_features(st, x, arch);
|
||||
process_single_frame(st, NULL);
|
||||
OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]) {
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
lpcnet_compute_single_frame_features_impl(st, x, features);
|
||||
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES]) {
|
||||
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
lpcnet_compute_single_frame_features_impl(st, x, features);
|
||||
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "plc_data.h"
|
||||
#include "os_support.h"
|
||||
#include "common.h"
|
||||
#include "cpu_support.h"
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.141592653
|
||||
|
@ -54,6 +55,7 @@ void lpcnet_plc_reset(LPCNetPLCState *st) {
|
|||
|
||||
int lpcnet_plc_init(LPCNetPLCState *st) {
|
||||
int ret;
|
||||
st->arch = opus_select_arch();
|
||||
fargan_init(&st->fargan);
|
||||
lpcnet_encoder_init(&st->enc);
|
||||
st->analysis_pos = PLC_BUF_SIZE;
|
||||
|
@ -109,10 +111,10 @@ static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
|
|||
float dense_out[PLC_DENSE1_OUT_SIZE];
|
||||
PLCNetState *net = &st->plc_net;
|
||||
celt_assert(st->loaded);
|
||||
_lpcnet_compute_dense(&st->model.plc_dense1, dense_out, in);
|
||||
compute_gruB(&st->model.plc_gru1, zeros, net->plc_gru1_state, dense_out);
|
||||
compute_gruB(&st->model.plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state);
|
||||
_lpcnet_compute_dense(&st->model.plc_out, out, net->plc_gru2_state);
|
||||
_lpcnet_compute_dense(&st->model.plc_dense1, dense_out, in, st->arch);
|
||||
compute_gruB(&st->model.plc_gru1, zeros, net->plc_gru1_state, dense_out, st->arch);
|
||||
compute_gruB(&st->model.plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state, st->arch);
|
||||
_lpcnet_compute_dense(&st->model.plc_out, out, net->plc_gru2_state, st->arch);
|
||||
}
|
||||
|
||||
static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
|
||||
|
@ -164,7 +166,7 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
|
|||
float plc_features[2*NB_BANDS+NB_FEATURES+1];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = 32768.f*st->pcm[st->analysis_pos+i];
|
||||
burg_cepstral_analysis(plc_features, x);
|
||||
lpcnet_compute_single_frame_features_float(&st->enc, x, st->features);
|
||||
lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch);
|
||||
if ((st->analysis_gap && count > 0) || count > 1) {
|
||||
queue_features(st, st->features);
|
||||
OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES);
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
struct LPCNetEncState{
|
||||
PitchDNNState pitchdnn;
|
||||
int arch;
|
||||
float analysis_mem[OVERLAP_SIZE];
|
||||
float mem_preemph;
|
||||
kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
|
||||
|
@ -67,7 +66,7 @@ struct LPCNetPLCState {
|
|||
|
||||
void preemphasis(float *y, float *mem, const float *x, float coef, int N);
|
||||
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in);
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in, int arch);
|
||||
|
||||
void lpcnet_reset_signal(LPCNetState *lpcnet);
|
||||
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
|
||||
|
@ -79,7 +78,6 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N,
|
|||
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
|
||||
void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
|
||||
void process_single_frame(LPCNetEncState *st, FILE *ffeat);
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
|
||||
void process_single_frame(LPCNetEncState *st, FILE *ffeat);
|
||||
|
||||
|
|
64
dnn/nnet.c
64
dnn/nnet.c
|
@ -69,50 +69,16 @@ static OPUS_INLINE float relu(float x)
|
|||
return x < 0 ? 0 : x;
|
||||
}
|
||||
|
||||
static void compute_linear(const LinearLayer *linear, float *out, const float *in)
|
||||
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
|
||||
{
|
||||
int i, M, N;
|
||||
const float *bias;
|
||||
celt_assert(in != out);
|
||||
bias = linear->bias;
|
||||
M = linear->nb_inputs;
|
||||
N = linear->nb_outputs;
|
||||
if (linear->float_weights != NULL) {
|
||||
if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
|
||||
else sgemv(out, linear->float_weights, N, M, N, in);
|
||||
} else if (linear->weights != NULL) {
|
||||
if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
|
||||
else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
|
||||
/* Only use SU biases on for integer matrices on SU archs. */
|
||||
#ifdef USE_SU_BIAS
|
||||
bias = linear->subias;
|
||||
#endif
|
||||
}
|
||||
else OPUS_CLEAR(out, N);
|
||||
if (bias != NULL) {
|
||||
for (i=0;i<N;i++) out[i] += bias[i];
|
||||
}
|
||||
if (linear->diag) {
|
||||
/* Diag is only used for GRU recurrent weights. */
|
||||
celt_assert(3*M == N);
|
||||
for (i=0;i<M;i++) {
|
||||
out[i] += linear->diag[i]*in[i];
|
||||
out[i+M] += linear->diag[i+M]*in[i];
|
||||
out[i+2*M] += linear->diag[i+2*M]*in[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation)
|
||||
{
|
||||
compute_linear(layer, output, input);
|
||||
compute_linear(layer, output, input, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation);
|
||||
}
|
||||
|
||||
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_NEURONS), DRED_MAX_RNN_NEURONS)
|
||||
|
||||
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in)
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
|
||||
{
|
||||
int i;
|
||||
int N;
|
||||
|
@ -129,8 +95,8 @@ void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *re
|
|||
h = &zrh[2*N];
|
||||
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
|
||||
celt_assert(in != state);
|
||||
compute_linear(input_weights, zrh, in);
|
||||
compute_linear(recurrent_weights, recur, state);
|
||||
compute_linear(input_weights, zrh, in, arch);
|
||||
compute_linear(recurrent_weights, recur, state, arch);
|
||||
for (i=0;i<2*N;i++)
|
||||
zrh[i] += recur[i];
|
||||
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID);
|
||||
|
@ -143,12 +109,12 @@ void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *re
|
|||
state[i] = h[i];
|
||||
}
|
||||
|
||||
void compute_glu(const LinearLayer *layer, float *output, const float *input)
|
||||
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
|
||||
{
|
||||
int i;
|
||||
float act2[MAX_INPUTS];
|
||||
celt_assert(layer->nb_inputs == layer->nb_outputs);
|
||||
compute_linear(layer, act2, input);
|
||||
compute_linear(layer, act2, input, arch);
|
||||
compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID);
|
||||
if (input == output) {
|
||||
/* Give a vectorization hint to the compiler for the in-place case. */
|
||||
|
@ -194,7 +160,7 @@ void compute_activation(float *output, const float *input, int N, int activation
|
|||
}
|
||||
}
|
||||
|
||||
void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *input)
|
||||
void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *input, int arch)
|
||||
{
|
||||
LinearLayer matrix;
|
||||
celt_assert(input != output);
|
||||
|
@ -207,7 +173,7 @@ void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *
|
|||
matrix.nb_inputs = layer->nb_inputs;
|
||||
matrix.nb_outputs = layer->nb_neurons;
|
||||
matrix.scale = NULL;
|
||||
compute_linear(&matrix, output, input);
|
||||
compute_linear(&matrix, output, input, arch);
|
||||
compute_activation(output, output, layer->nb_neurons, layer->activation);
|
||||
}
|
||||
|
||||
|
@ -218,7 +184,7 @@ void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *
|
|||
#endif
|
||||
#define MAX_IDX_SIZE 8192
|
||||
|
||||
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input)
|
||||
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input, int arch)
|
||||
{
|
||||
LinearLayer in_matrix, rec_matrix;
|
||||
int i, M, N;
|
||||
|
@ -262,25 +228,25 @@ void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *stat
|
|||
rec_matrix.float_weights = NULL;
|
||||
#endif
|
||||
rec_matrix.weights_idx = NULL;
|
||||
compute_generic_gru(&in_matrix, &rec_matrix, state, input);
|
||||
compute_generic_gru(&in_matrix, &rec_matrix, state, input, arch);
|
||||
}
|
||||
|
||||
|
||||
#define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
|
||||
|
||||
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation)
|
||||
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
|
||||
{
|
||||
float tmp[MAX_CONV_INPUTS_ALL];
|
||||
celt_assert(input != output);
|
||||
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
|
||||
OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
|
||||
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
|
||||
compute_linear(layer, output, tmp);
|
||||
compute_linear(layer, output, tmp, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation);
|
||||
OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
|
||||
}
|
||||
|
||||
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation)
|
||||
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
|
||||
{
|
||||
float tmp[MAX_CONV_INPUTS_ALL];
|
||||
int ksize = layer->nb_inputs/input_size;
|
||||
|
@ -290,7 +256,7 @@ void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, fl
|
|||
if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
|
||||
else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
|
||||
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
|
||||
compute_linear(layer, output, tmp);
|
||||
compute_linear(layer, output, tmp, arch);
|
||||
compute_activation(output, output, layer->nb_outputs, activation);
|
||||
if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
|
||||
else {
|
||||
|
|
37
dnn/nnet.h
37
dnn/nnet.h
|
@ -126,18 +126,18 @@ typedef struct {
|
|||
int dim;
|
||||
} EmbeddingLayer;
|
||||
|
||||
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation);
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in);
|
||||
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation);
|
||||
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation);
|
||||
void compute_glu(const LinearLayer *layer, float *output, const float *input);
|
||||
void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation);
|
||||
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
|
||||
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch);
|
||||
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch);
|
||||
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch);
|
||||
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch);
|
||||
void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
|
||||
|
||||
void compute_activation(float *output, const float *input, int N, int activation);
|
||||
|
||||
void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *input);
|
||||
void _lpcnet_compute_dense(const DenseLayer *layer, float *output, const float *input, int arch);
|
||||
|
||||
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input);
|
||||
void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *state, const float *input, int arch);
|
||||
|
||||
|
||||
|
||||
|
@ -189,4 +189,25 @@ int gru_init(GRULayer *layer, const WeightArray *arrays,
|
|||
void compute_conv2d(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
|
||||
|
||||
|
||||
void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||
#include "x86/dnn_x86.h"
|
||||
#endif
|
||||
|
||||
#ifndef OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
#if defined(_MSC_VER)
|
||||
#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
|
||||
#else
|
||||
#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* NNET_H_ */
|
||||
|
|
76
dnn/nnet_arch.h
Normal file
76
dnn/nnet_arch.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef NNET_ARCH_H
|
||||
#define NNET_ARCH_H
|
||||
|
||||
#include "nnet.h"
|
||||
#include "arch.h"
|
||||
#include "os_support.h"
|
||||
#include "vec.h"
|
||||
|
||||
#define CAT_SUFFIX2(a,b) a ## b
|
||||
#define CAT_SUFFIX(a,b) CAT_SUFFIX2(a, b)
|
||||
|
||||
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
|
||||
|
||||
void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const float *in)
|
||||
{
|
||||
int i, M, N;
|
||||
const float *bias;
|
||||
celt_assert(in != out);
|
||||
bias = linear->bias;
|
||||
M = linear->nb_inputs;
|
||||
N = linear->nb_outputs;
|
||||
if (linear->float_weights != NULL) {
|
||||
if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
|
||||
else sgemv(out, linear->float_weights, N, M, N, in);
|
||||
} else if (linear->weights != NULL) {
|
||||
if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
|
||||
else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
|
||||
/* Only use SU biases on for integer matrices on SU archs. */
|
||||
#ifdef USE_SU_BIAS
|
||||
bias = linear->subias;
|
||||
#endif
|
||||
}
|
||||
else OPUS_CLEAR(out, N);
|
||||
if (bias != NULL) {
|
||||
for (i=0;i<N;i++) out[i] += bias[i];
|
||||
}
|
||||
if (linear->diag) {
|
||||
/* Diag is only used for GRU recurrent weights. */
|
||||
celt_assert(3*M == N);
|
||||
for (i=0;i<M;i++) {
|
||||
out[i] += linear->diag[i]*in[i];
|
||||
out[i+M] += linear->diag[i+M]*in[i];
|
||||
out[i+2*M] += linear->diag[i+2*M]*in[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
35
dnn/nnet_default.c
Normal file
35
dnn/nnet_default.c
Normal file
|
@ -0,0 +1,35 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#define RTCD_ARCH c
|
||||
|
||||
#include "nnet_arch.h"
|
|
@ -12,7 +12,8 @@
|
|||
float compute_pitchdnn(
|
||||
PitchDNNState *st,
|
||||
const float *if_features,
|
||||
const float *xcorr_features
|
||||
const float *xcorr_features,
|
||||
int arch
|
||||
)
|
||||
{
|
||||
float if1_out[DENSE_IF_UPSAMPLER_1_OUT_SIZE];
|
||||
|
@ -28,16 +29,16 @@ float compute_pitchdnn(
|
|||
float count=0;
|
||||
PitchDNN *model = &st->model;
|
||||
/* IF */
|
||||
compute_generic_dense(&model->dense_if_upsampler_1, if1_out, if_features, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->dense_if_upsampler_2, &downsampler_in[NB_XCORR_FEATURES], if1_out, ACTIVATION_TANH);
|
||||
compute_generic_dense(&model->dense_if_upsampler_1, if1_out, if_features, ACTIVATION_TANH, arch);
|
||||
compute_generic_dense(&model->dense_if_upsampler_2, &downsampler_in[NB_XCORR_FEATURES], if1_out, ACTIVATION_TANH, arch);
|
||||
/* xcorr*/
|
||||
OPUS_COPY(&conv1_tmp1[1], xcorr_features, NB_XCORR_FEATURES);
|
||||
compute_conv2d(&model->conv2d_1, &conv1_tmp2[1], st->xcorr_mem1, conv1_tmp1, NB_XCORR_FEATURES, NB_XCORR_FEATURES+2, ACTIVATION_TANH);
|
||||
compute_conv2d(&model->conv2d_2, downsampler_in, st->xcorr_mem2, conv1_tmp2, NB_XCORR_FEATURES, NB_XCORR_FEATURES, ACTIVATION_TANH);
|
||||
|
||||
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH);
|
||||
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out);
|
||||
compute_generic_dense(&model->dense_final_upsampler, output, st->gru_state, ACTIVATION_LINEAR);
|
||||
compute_generic_dense(&model->dense_downsampler, downsampler_out, downsampler_in, ACTIVATION_TANH, arch);
|
||||
compute_generic_gru(&model->gru_1_input, &model->gru_1_recurrent, st->gru_state, downsampler_out, arch);
|
||||
compute_generic_dense(&model->dense_final_upsampler, output, st->gru_state, ACTIVATION_LINEAR, arch);
|
||||
for (i=0;i<180;i++) {
|
||||
if (output[i] > maxval) {
|
||||
pos = i;
|
||||
|
@ -65,7 +66,6 @@ void pitchdnn_init(PitchDNNState *st)
|
|||
ret = 0;
|
||||
#endif
|
||||
celt_assert(ret == 0);
|
||||
/* FIXME: perform arch detection. */
|
||||
}
|
||||
|
||||
int pitchdnn_load_model(PitchDNNState *st, const unsigned char *data, int len) {
|
||||
|
|
|
@ -27,7 +27,8 @@ int pitchdnn_load_model(PitchDNNState *st, const unsigned char *data, int len);
|
|||
float compute_pitchdnn(
|
||||
PitchDNNState *st,
|
||||
const float *if_features,
|
||||
const float *xcorr_features
|
||||
const float *xcorr_features,
|
||||
int arch
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -655,11 +655,6 @@ static inline mm256i_emu opus_mm256_dpbusds_epi32(mm256i_emu src, mm256i_emu a,
|
|||
return res;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
|
||||
#else
|
||||
#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
|
|
78
dnn/x86/dnn_x86.h
Normal file
78
dnn/x86/dnn_x86.h
Normal file
|
@ -0,0 +1,78 @@
|
|||
/* Copyright (c) 2011-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DNN_X86_H
|
||||
#define DNN_X86_H
|
||||
|
||||
#include "cpu_support.h"
|
||||
#include "opus_types.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||
void compute_linear_sse2(const LinearLayer *linear, float *out, const float *in);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
void compute_linear_sse4_1(const LinearLayer *linear, float *out, const float *in);
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
void compute_linear_avx2(const LinearLayer *linear, float *out, const float *in);
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_AVX2)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_avx2(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse4_1(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_X86_PRESUME_SSE2) && !defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_sse2(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_X86_MAY_HAVE_AVX2) || defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2))
|
||||
|
||||
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
);
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) \
|
||||
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* DNN_X86_H */
|
38
dnn/x86/nnet_avx2.c
Normal file
38
dnn/x86/nnet_avx2.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifndef __AVX2__
|
||||
#error nnet_avx2.c is being compiled without AVX2 enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH avx2
|
||||
|
||||
#include "nnet_arch.h"
|
38
dnn/x86/nnet_sse2.c
Normal file
38
dnn/x86/nnet_sse2.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifndef __SSE2__
|
||||
#error nnet_sse2.c is being compiled without SSE2 enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH sse2
|
||||
|
||||
#include "nnet_arch.h"
|
38
dnn/x86/nnet_sse4_1.c
Normal file
38
dnn/x86/nnet_sse4_1.c
Normal file
|
@ -0,0 +1,38 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
#error nnet_sse4_1.c is being compiled without SSE4.1 enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH sse4_1
|
||||
|
||||
#include "nnet_arch.h"
|
54
dnn/x86/x86_dnn_map.c
Normal file
54
dnn/x86/x86_dnn_map.c
Normal file
|
@ -0,0 +1,54 @@
|
|||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "x86/x86cpu.h"
|
||||
#include "nnet.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
||||
#if (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_AVX2))
|
||||
|
||||
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
) = {
|
||||
compute_linear_c, /* non-sse */
|
||||
compute_linear_c,
|
||||
MAY_HAVE_SSE2(compute_linear),
|
||||
MAY_HAVE_SSE4_1(compute_linear), /* sse4.1 */
|
||||
MAY_HAVE_AVX2(compute_linear) /* avx */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
|
@ -12,7 +12,9 @@ dnn/vec.h \
|
|||
dnn/vec_avx.h \
|
||||
dnn/vec_neon.h \
|
||||
dnn/pitchdnn.h \
|
||||
dnn/pitchdnn_data.h
|
||||
dnn/pitchdnn_data.h \
|
||||
dnn/x86/dnn_x86.h \
|
||||
dnn/nnet_arch.h
|
||||
|
||||
DRED_HEAD = \
|
||||
silk/dred_coding.h \
|
||||
|
|
|
@ -7,6 +7,7 @@ dnn/lpcnet_enc.c \
|
|||
dnn/lpcnet_plc.c \
|
||||
dnn/lpcnet_tables.c \
|
||||
dnn/nnet.c \
|
||||
dnn/nnet_default.c \
|
||||
dnn/plc_data.c \
|
||||
dnn/parse_lpcnet_weights.c \
|
||||
dnn/pitchdnn.c \
|
||||
|
@ -21,3 +22,8 @@ dnn/dred_rdovae_stats_data.c \
|
|||
silk/dred_encoder.c \
|
||||
silk/dred_coding.c \
|
||||
silk/dred_decoder.c
|
||||
|
||||
DNN_SOURCES_X86_RTCD = dnn/x86/x86_dnn_map.c
|
||||
DNN_SOURCES_AVX2 = dnn/x86/nnet_avx2.c
|
||||
DNN_SOURCES_SSE4_1 = dnn/x86/nnet_sse4_1.c
|
||||
DNN_SOURCES_SSE2 = dnn/x86/nnet_sse2.c
|
||||
|
|
|
@ -87,7 +87,7 @@ void dred_encoder_init(DREDEnc* enc, opus_int32 Fs, int channels)
|
|||
dred_encoder_reset(enc);
|
||||
}
|
||||
|
||||
static void dred_process_frame(DREDEnc *enc)
|
||||
static void dred_process_frame(DREDEnc *enc, int arch)
|
||||
{
|
||||
float feature_buffer[2 * 36];
|
||||
float input_buffer[2*DRED_NUM_FEATURES] = {0};
|
||||
|
@ -97,15 +97,15 @@ static void dred_process_frame(DREDEnc *enc)
|
|||
OPUS_MOVE(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM);
|
||||
|
||||
/* calculate LPCNet features */
|
||||
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer);
|
||||
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36);
|
||||
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer, arch);
|
||||
lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36, arch);
|
||||
|
||||
/* prepare input buffer (discard LPC coefficients) */
|
||||
OPUS_COPY(input_buffer, feature_buffer, DRED_NUM_FEATURES);
|
||||
OPUS_COPY(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES);
|
||||
|
||||
/* run RDOVAE encoder */
|
||||
dred_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer);
|
||||
dred_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer, arch);
|
||||
enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
|
||||
}
|
||||
|
||||
|
@ -188,7 +188,7 @@ static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float
|
|||
}
|
||||
}
|
||||
|
||||
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay)
|
||||
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch)
|
||||
{
|
||||
int curr_offset16k;
|
||||
int frame_size16k = frame_size * 16000 / enc->Fs;
|
||||
|
@ -206,7 +206,7 @@ void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int ex
|
|||
if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE)
|
||||
{
|
||||
curr_offset16k += 320;
|
||||
dred_process_frame(enc);
|
||||
dred_process_frame(enc, arch);
|
||||
enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
|
||||
OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
|
||||
/* 15 ms (6*2.5 ms) is the ideal offset for DRED because it corresponds to our vocoder look-ahead. */
|
||||
|
|
|
@ -64,7 +64,7 @@ void dred_encoder_reset(DREDEnc* enc);
|
|||
|
||||
void dred_deinit_encoder(DREDEnc *enc);
|
||||
|
||||
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay);
|
||||
void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size, int extra_delay, int arch);
|
||||
|
||||
int dred_encode_silk_frame(const DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes);
|
||||
|
||||
|
|
|
@ -1424,7 +1424,7 @@ int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src, OpusDRED *
|
|||
OPUS_COPY(dst, src, 1);
|
||||
if (dst->process_stage == 2)
|
||||
return OPUS_OK;
|
||||
DRED_rdovae_decode_all(&dred_dec->model, dst->fec_features, dst->state, dst->latents, dst->nb_latents);
|
||||
DRED_rdovae_decode_all(&dred_dec->model, dst->fec_features, dst->state, dst->latents, dst->nb_latents, dred_dec->arch);
|
||||
dst->process_stage = 2;
|
||||
return OPUS_OK;
|
||||
#else
|
||||
|
|
|
@ -1715,7 +1715,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
#ifdef ENABLE_DRED
|
||||
if ( st->dred_duration > 0 && st->dred_encoder.loaded ) {
|
||||
/* DRED Encoder */
|
||||
dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer );
|
||||
dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer, st->arch );
|
||||
} else {
|
||||
st->dred_encoder.latents_buffer_fill = 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue