mirror of
https://github.com/xiph/opus.git
synced 2025-06-02 00:27:43 +00:00
Split off decoder code
This commit is contained in:
parent
8dcccc8934
commit
e63292bd56
6 changed files with 168 additions and 122 deletions
|
@ -22,7 +22,7 @@ endif
|
||||||
|
|
||||||
all: dump_data test_lpcnet test_vec
|
all: dump_data test_lpcnet test_vec
|
||||||
|
|
||||||
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/lpcnet_enc.o src/ceps_codebooks.o
|
dump_data_objs := src/dump_data.o src/freq.o src/kiss_fft.o src/pitch.o src/celt_lpc.o src/lpcnet_dec.o src/lpcnet_enc.o src/ceps_codebooks.o
|
||||||
dump_data_deps := $(dump_data_objs:.o=.d)
|
dump_data_deps := $(dump_data_objs:.o=.d)
|
||||||
dump_data: $(dump_data_objs)
|
dump_data: $(dump_data_objs)
|
||||||
gcc -o $@ $(CFLAGS) $(dump_data_objs) -lm
|
gcc -o $@ $(CFLAGS) $(dump_data_objs) -lm
|
||||||
|
|
118
dnn/dump_data.c
118
dnn/dump_data.c
|
@ -42,118 +42,6 @@
|
||||||
#include "lpcnet.h"
|
#include "lpcnet.h"
|
||||||
#include "lpcnet_private.h"
|
#include "lpcnet_private.h"
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int byte_pos;
|
|
||||||
int bit_pos;
|
|
||||||
int max_bytes;
|
|
||||||
const unsigned char *chars;
|
|
||||||
} unpacker;
|
|
||||||
|
|
||||||
void bits_unpacker_init(unpacker *bits, unsigned char *buf, int size) {
|
|
||||||
bits->byte_pos = 0;
|
|
||||||
bits->bit_pos = 0;
|
|
||||||
bits->max_bytes = size;
|
|
||||||
bits->chars = buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int bits_unpack(unpacker *bits, int nb_bits) {
|
|
||||||
unsigned int d=0;
|
|
||||||
while(nb_bits)
|
|
||||||
{
|
|
||||||
if (bits->byte_pos == bits->max_bytes) {
|
|
||||||
fprintf(stderr, "something went horribly wrong\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
d<<=1;
|
|
||||||
d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
|
|
||||||
bits->bit_pos++;
|
|
||||||
if (bits->bit_pos==BITS_PER_CHAR)
|
|
||||||
{
|
|
||||||
bits->bit_pos=0;
|
|
||||||
bits->byte_pos++;
|
|
||||||
}
|
|
||||||
nb_bits--;
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
void decode_packet(FILE *ffeat, float *vq_mem, unsigned char buf[8])
|
|
||||||
{
|
|
||||||
int c0_id;
|
|
||||||
int main_pitch;
|
|
||||||
int modulation;
|
|
||||||
int corr_id;
|
|
||||||
int vq_end[3];
|
|
||||||
int vq_mid;
|
|
||||||
int interp_id;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
int sub;
|
|
||||||
int voiced = 1;
|
|
||||||
float frame_corr;
|
|
||||||
float features[4][NB_TOTAL_FEATURES];
|
|
||||||
unpacker bits;
|
|
||||||
|
|
||||||
bits_unpacker_init(&bits, buf, 8);
|
|
||||||
c0_id = bits_unpack(&bits, 7);
|
|
||||||
main_pitch = bits_unpack(&bits, 6);
|
|
||||||
modulation = bits_unpack(&bits, 3);
|
|
||||||
corr_id = bits_unpack(&bits, 2);
|
|
||||||
vq_end[0] = bits_unpack(&bits, 10);
|
|
||||||
vq_end[1] = bits_unpack(&bits, 10);
|
|
||||||
vq_end[2] = bits_unpack(&bits, 10);
|
|
||||||
vq_mid = bits_unpack(&bits, 13);
|
|
||||||
interp_id = bits_unpack(&bits, 3);
|
|
||||||
//fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
|
|
||||||
|
|
||||||
|
|
||||||
for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
|
|
||||||
|
|
||||||
modulation -= 4;
|
|
||||||
if (modulation==-4) {
|
|
||||||
voiced = 0;
|
|
||||||
modulation = 0;
|
|
||||||
}
|
|
||||||
if (voiced) {
|
|
||||||
frame_corr = 0.3875f + .175f*corr_id;
|
|
||||||
} else {
|
|
||||||
frame_corr = 0.0375f + .075f*corr_id;
|
|
||||||
}
|
|
||||||
for (sub=0;sub<4;sub++) {
|
|
||||||
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
|
||||||
p *= 1 + modulation/16./7.*(2*sub-3);
|
|
||||||
features[sub][2*NB_BANDS] = .02*(p-100);
|
|
||||||
features[sub][2*NB_BANDS + 1] = frame_corr-.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
features[3][0] = (c0_id-64)/4.;
|
|
||||||
for (i=0;i<NB_BANDS_1;i++) {
|
|
||||||
features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
float sign = 1;
|
|
||||||
if (vq_mid >= 4096) {
|
|
||||||
vq_mid -= 4096;
|
|
||||||
sign = -1;
|
|
||||||
}
|
|
||||||
for (i=0;i<NB_BANDS;i++) {
|
|
||||||
features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
|
|
||||||
}
|
|
||||||
if ((vq_mid&MULTI_MASK) < 2) {
|
|
||||||
for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
|
|
||||||
} else if ((vq_mid&MULTI_MASK) == 2) {
|
|
||||||
for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
|
|
||||||
} else {
|
|
||||||
for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
|
|
||||||
}
|
|
||||||
|
|
||||||
perform_double_interp(features, vq_mem, interp_id);
|
|
||||||
|
|
||||||
RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
|
|
||||||
for (i=0;i<4;i++) {
|
|
||||||
fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
|
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
|
||||||
int i;
|
int i;
|
||||||
|
@ -293,11 +181,15 @@ int main(int argc, char **argv) {
|
||||||
while (1) {
|
while (1) {
|
||||||
int ret;
|
int ret;
|
||||||
unsigned char buf[8];
|
unsigned char buf[8];
|
||||||
|
float features[4][NB_TOTAL_FEATURES];
|
||||||
//int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;
|
//int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;
|
||||||
//ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);
|
//ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);
|
||||||
ret = fread(buf, 1, 8, f1);
|
ret = fread(buf, 1, 8, f1);
|
||||||
if (ret != 8) break;
|
if (ret != 8) break;
|
||||||
decode_packet(ffeat, vq_mem, buf);
|
decode_packet(features, vq_mem, buf);
|
||||||
|
for (i=0;i<4;i++) {
|
||||||
|
fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
154
dnn/lpcnet_dec.c
Normal file
154
dnn/lpcnet_dec.c
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
/* Copyright (c) 2017-2019 Mozilla */
|
||||||
|
/*
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||||
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "kiss_fft.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include "freq.h"
|
||||||
|
#include "pitch.h"
|
||||||
|
#include "arch.h"
|
||||||
|
#include "celt_lpc.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include "lpcnet_private.h"
|
||||||
|
#include "lpcnet.h"
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int byte_pos;
|
||||||
|
int bit_pos;
|
||||||
|
int max_bytes;
|
||||||
|
const unsigned char *chars;
|
||||||
|
} unpacker;
|
||||||
|
|
||||||
|
void bits_unpacker_init(unpacker *bits, unsigned char *buf, int size) {
|
||||||
|
bits->byte_pos = 0;
|
||||||
|
bits->bit_pos = 0;
|
||||||
|
bits->max_bytes = size;
|
||||||
|
bits->chars = buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int bits_unpack(unpacker *bits, int nb_bits) {
|
||||||
|
unsigned int d=0;
|
||||||
|
while(nb_bits)
|
||||||
|
{
|
||||||
|
if (bits->byte_pos == bits->max_bytes) {
|
||||||
|
fprintf(stderr, "something went horribly wrong\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
d<<=1;
|
||||||
|
d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
|
||||||
|
bits->bit_pos++;
|
||||||
|
if (bits->bit_pos==BITS_PER_CHAR)
|
||||||
|
{
|
||||||
|
bits->bit_pos=0;
|
||||||
|
bits->byte_pos++;
|
||||||
|
}
|
||||||
|
nb_bits--;
|
||||||
|
}
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, unsigned char buf[8])
|
||||||
|
{
|
||||||
|
int c0_id;
|
||||||
|
int main_pitch;
|
||||||
|
int modulation;
|
||||||
|
int corr_id;
|
||||||
|
int vq_end[3];
|
||||||
|
int vq_mid;
|
||||||
|
int interp_id;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
int sub;
|
||||||
|
int voiced = 1;
|
||||||
|
float frame_corr;
|
||||||
|
;
|
||||||
|
unpacker bits;
|
||||||
|
|
||||||
|
bits_unpacker_init(&bits, buf, 8);
|
||||||
|
c0_id = bits_unpack(&bits, 7);
|
||||||
|
main_pitch = bits_unpack(&bits, 6);
|
||||||
|
modulation = bits_unpack(&bits, 3);
|
||||||
|
corr_id = bits_unpack(&bits, 2);
|
||||||
|
vq_end[0] = bits_unpack(&bits, 10);
|
||||||
|
vq_end[1] = bits_unpack(&bits, 10);
|
||||||
|
vq_end[2] = bits_unpack(&bits, 10);
|
||||||
|
vq_mid = bits_unpack(&bits, 13);
|
||||||
|
interp_id = bits_unpack(&bits, 3);
|
||||||
|
//fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
|
||||||
|
|
||||||
|
|
||||||
|
for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
|
||||||
|
|
||||||
|
modulation -= 4;
|
||||||
|
if (modulation==-4) {
|
||||||
|
voiced = 0;
|
||||||
|
modulation = 0;
|
||||||
|
}
|
||||||
|
if (voiced) {
|
||||||
|
frame_corr = 0.3875f + .175f*corr_id;
|
||||||
|
} else {
|
||||||
|
frame_corr = 0.0375f + .075f*corr_id;
|
||||||
|
}
|
||||||
|
for (sub=0;sub<4;sub++) {
|
||||||
|
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
||||||
|
p *= 1 + modulation/16./7.*(2*sub-3);
|
||||||
|
features[sub][2*NB_BANDS] = .02*(p-100);
|
||||||
|
features[sub][2*NB_BANDS + 1] = frame_corr-.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
features[3][0] = (c0_id-64)/4.;
|
||||||
|
for (i=0;i<NB_BANDS_1;i++) {
|
||||||
|
features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
|
||||||
|
}
|
||||||
|
|
||||||
|
float sign = 1;
|
||||||
|
if (vq_mid >= 4096) {
|
||||||
|
vq_mid -= 4096;
|
||||||
|
sign = -1;
|
||||||
|
}
|
||||||
|
for (i=0;i<NB_BANDS;i++) {
|
||||||
|
features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
|
||||||
|
}
|
||||||
|
if ((vq_mid&MULTI_MASK) < 2) {
|
||||||
|
for (i=0;i<NB_BANDS;i++) features[1][i] += .5*(vq_mem[i] + features[3][i]);
|
||||||
|
} else if ((vq_mid&MULTI_MASK) == 2) {
|
||||||
|
for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
|
||||||
|
} else {
|
||||||
|
for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
perform_double_interp(features, vq_mem, interp_id);
|
||||||
|
|
||||||
|
RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (c) 2017-2018 Mozilla */
|
/* Copyright (c) 2017-2019 Mozilla */
|
||||||
/*
|
/*
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
modification, are permitted provided that the following conditions
|
modification, are permitted provided that the following conditions
|
||||||
|
@ -43,8 +43,6 @@
|
||||||
#include "lpcnet.h"
|
#include "lpcnet.h"
|
||||||
|
|
||||||
|
|
||||||
#define NB_DELTA_CEPS 6
|
|
||||||
|
|
||||||
//#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)
|
//#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)
|
||||||
|
|
||||||
|
|
||||||
|
@ -318,8 +316,6 @@ int quantize_diff(float *x, float *left, float *right, float *codebook, int bits
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FORBIDDEN_INTERP 7
|
|
||||||
|
|
||||||
int interp_search(const float *x, const float *left, const float *right, float *dist_out)
|
int interp_search(const float *x, const float *left, const float *right, float *dist_out)
|
||||||
{
|
{
|
||||||
int i, k;
|
int i, k;
|
||||||
|
|
|
@ -16,6 +16,9 @@
|
||||||
#define MULTI 4
|
#define MULTI 4
|
||||||
#define MULTI_MASK (MULTI-1)
|
#define MULTI_MASK (MULTI-1)
|
||||||
|
|
||||||
|
#define FORBIDDEN_INTERP 7
|
||||||
|
|
||||||
|
|
||||||
struct LPCNetEncState{
|
struct LPCNetEncState{
|
||||||
float analysis_mem[OVERLAP_SIZE];
|
float analysis_mem[OVERLAP_SIZE];
|
||||||
float mem_preemph;
|
float mem_preemph;
|
||||||
|
@ -51,5 +54,6 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
|
|
||||||
void compute_frame_features(LPCNetEncState *st, const float *in);
|
void compute_frame_features(LPCNetEncState *st, const float *in);
|
||||||
|
|
||||||
|
void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, unsigned char buf[8]);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -103,8 +103,8 @@ del pred
|
||||||
del in_exc
|
del in_exc
|
||||||
|
|
||||||
# dump models to disk as we go
|
# dump models to disk as we go
|
||||||
checkpoint = ModelCheckpoint('lpcnet24fq_384_10_G16_{epoch:02d}.h5')
|
checkpoint = ModelCheckpoint('lpcnet24g_384_10_G16_{epoch:02d}.h5')
|
||||||
|
|
||||||
model.load_weights('lpcnet24f_384_10_G16_31.h5')
|
model.load_weights('lpcnet24c_384_10_G16_120.h5')
|
||||||
model.compile(optimizer=Adam(0.0005, amsgrad=True, decay=5e-5), loss='sparse_categorical_crossentropy')
|
model.compile(optimizer=Adam(0.0001, amsgrad=True), loss='sparse_categorical_crossentropy')
|
||||||
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))])
|
model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2))])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue