mirror of
https://github.com/xiph/opus.git
synced 2025-05-15 16:08:30 +00:00
Remove support for LPCNet quantization
This commit is contained in:
parent
bfa01f1a1c
commit
247e6a587c
8 changed files with 15 additions and 972 deletions
|
@ -111,6 +111,8 @@ noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
|
|||
celt/tests/test_unit_types \
|
||||
opus_compare \
|
||||
opus_demo \
|
||||
lpcnet_demo \
|
||||
dump_data \
|
||||
repacketizer_demo \
|
||||
silk/tests/test_unit_LPC_inv_pred_gain \
|
||||
tests/test_opus_api \
|
||||
|
@ -239,6 +241,12 @@ opus_custom_demo_LDADD = libopus.la $(LIBM)
|
|||
endif
|
||||
endif
|
||||
|
||||
lpcnet_demo_SOURCES = dnn/lpcnet_demo.c
|
||||
lpcnet_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
|
||||
|
||||
dump_data_SOURCES = dnn/dump_data.c
|
||||
dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
|
||||
|
||||
EXTRA_DIST = opus.pc.in \
|
||||
opus-uninstalled.pc.in \
|
||||
opus.m4 \
|
||||
|
|
|
@ -55,7 +55,7 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const
|
|||
}
|
||||
}
|
||||
|
||||
static float uni_rand() {
|
||||
static float uni_rand(void) {
|
||||
return rand()/(double)RAND_MAX-.5;
|
||||
}
|
||||
|
||||
|
@ -135,9 +135,6 @@ int main(int argc, char **argv) {
|
|||
LPCNetEncState *st;
|
||||
float noise_std=0;
|
||||
int training = -1;
|
||||
int encode = 0;
|
||||
int decode = 0;
|
||||
int quantize = 0;
|
||||
int burg = 0;
|
||||
srand(getpid());
|
||||
st = lpcnet_encoder_create();
|
||||
|
@ -151,24 +148,7 @@ int main(int argc, char **argv) {
|
|||
training = 0;
|
||||
}
|
||||
if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
|
||||
if (argc == 5 && strcmp(argv[1], "-qtrain")==0) {
|
||||
training = 1;
|
||||
quantize = 1;
|
||||
}
|
||||
if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
|
||||
if (argc == 4 && strcmp(argv[1], "-qtest")==0) {
|
||||
training = 0;
|
||||
quantize = 1;
|
||||
}
|
||||
if (argc == 4 && strcmp(argv[1], "-encode")==0) {
|
||||
training = 0;
|
||||
quantize = 1;
|
||||
encode = 1;
|
||||
}
|
||||
if (argc == 4 && strcmp(argv[1], "-decode")==0) {
|
||||
training = 0;
|
||||
decode = 1;
|
||||
}
|
||||
if (training == -1) {
|
||||
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
|
||||
fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
|
||||
|
@ -184,23 +164,6 @@ int main(int argc, char **argv) {
|
|||
fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
|
||||
exit(1);
|
||||
}
|
||||
if (decode) {
|
||||
float vq_mem[NB_BANDS] = {0};
|
||||
while (1) {
|
||||
int ret;
|
||||
unsigned char buf[8];
|
||||
float features[4][NB_TOTAL_FEATURES];
|
||||
/*int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;*/
|
||||
/*ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);*/
|
||||
ret = fread(buf, 1, 8, f1);
|
||||
if (ret != 8) break;
|
||||
decode_packet(features, vq_mem, buf);
|
||||
for (i=0;i<4;i++) {
|
||||
fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (training) {
|
||||
fpcm = fopen(argv[4], "wb");
|
||||
if (fpcm == NULL) {
|
||||
|
@ -279,18 +242,11 @@ int main(int argc, char **argv) {
|
|||
compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std);
|
||||
}
|
||||
|
||||
if (!quantize) {
|
||||
process_single_frame(st, ffeat);
|
||||
if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1);
|
||||
}
|
||||
st->pcount++;
|
||||
/* Running on groups of 4 frames. */
|
||||
if (st->pcount == 4) {
|
||||
if (quantize) {
|
||||
unsigned char buf[8];
|
||||
process_superframe(st, buf, ffeat, encode, quantize);
|
||||
if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4);
|
||||
}
|
||||
st->pcount = 0;
|
||||
}
|
||||
/*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/
|
||||
|
|
40
dnn/lpcnet.c
40
dnn/lpcnet.c
|
@ -279,43 +279,3 @@ void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *o
|
|||
LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N) {
|
||||
lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
|
||||
}
|
||||
|
||||
#ifndef OPUS_BUILD
|
||||
|
||||
LPCNET_EXPORT int lpcnet_decoder_get_size()
|
||||
{
|
||||
return sizeof(LPCNetDecState);
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_decoder_init(LPCNetDecState *st)
|
||||
{
|
||||
memset(st, 0, lpcnet_decoder_get_size());
|
||||
lpcnet_init(&st->lpcnet_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LPCNET_EXPORT LPCNetDecState *lpcnet_decoder_create()
|
||||
{
|
||||
LPCNetDecState *st;
|
||||
st = malloc(lpcnet_decoder_get_size());
|
||||
lpcnet_decoder_init(st);
|
||||
return st;
|
||||
}
|
||||
|
||||
LPCNET_EXPORT void lpcnet_decoder_destroy(LPCNetDecState *st)
|
||||
{
|
||||
free(st);
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, short *pcm)
|
||||
{
|
||||
int k;
|
||||
float features[4][NB_TOTAL_FEATURES];
|
||||
decode_packet(features, st->vq_mem, buf);
|
||||
for (k=0;k<4;k++) {
|
||||
lpcnet_synthesize(&st->lpcnet_state, features[k], &pcm[k*FRAME_SIZE], FRAME_SIZE);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
156
dnn/lpcnet_dec.c
156
dnn/lpcnet_dec.c
|
@ -1,156 +0,0 @@
|
|||
/* Copyright (c) 2017-2019 Mozilla */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "kiss_fft.h"
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include "freq.h"
|
||||
#include "pitch.h"
|
||||
#include "arch.h"
|
||||
#include <assert.h>
|
||||
#include "lpcnet_private.h"
|
||||
#include "lpcnet.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
int byte_pos;
|
||||
int bit_pos;
|
||||
int max_bytes;
|
||||
const unsigned char *chars;
|
||||
} unpacker;
|
||||
|
||||
void bits_unpacker_init(unpacker *bits, const unsigned char *buf, int size) {
|
||||
bits->byte_pos = 0;
|
||||
bits->bit_pos = 0;
|
||||
bits->max_bytes = size;
|
||||
bits->chars = buf;
|
||||
}
|
||||
|
||||
unsigned int bits_unpack(unpacker *bits, int nb_bits) {
|
||||
unsigned int d=0;
|
||||
while(nb_bits)
|
||||
{
|
||||
if (bits->byte_pos == bits->max_bytes) {
|
||||
fprintf(stderr, "something went horribly wrong\n");
|
||||
return 0;
|
||||
}
|
||||
d<<=1;
|
||||
d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1;
|
||||
bits->bit_pos++;
|
||||
if (bits->bit_pos==BITS_PER_CHAR)
|
||||
{
|
||||
bits->bit_pos=0;
|
||||
bits->byte_pos++;
|
||||
}
|
||||
nb_bits--;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
#ifndef OPUS_BUILD
|
||||
void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8])
|
||||
{
|
||||
int c0_id;
|
||||
int main_pitch;
|
||||
int modulation;
|
||||
int corr_id;
|
||||
int vq_end[3];
|
||||
int vq_mid;
|
||||
int interp_id;
|
||||
|
||||
int i;
|
||||
int sub;
|
||||
int voiced = 1;
|
||||
float frame_corr;
|
||||
float sign;
|
||||
unpacker bits;
|
||||
|
||||
bits_unpacker_init(&bits, buf, 8);
|
||||
c0_id = bits_unpack(&bits, 7);
|
||||
main_pitch = bits_unpack(&bits, 6);
|
||||
modulation = bits_unpack(&bits, 3);
|
||||
corr_id = bits_unpack(&bits, 2);
|
||||
vq_end[0] = bits_unpack(&bits, 10);
|
||||
vq_end[1] = bits_unpack(&bits, 10);
|
||||
vq_end[2] = bits_unpack(&bits, 10);
|
||||
vq_mid = bits_unpack(&bits, 13);
|
||||
interp_id = bits_unpack(&bits, 3);
|
||||
/*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/
|
||||
|
||||
|
||||
for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
|
||||
|
||||
modulation -= 4;
|
||||
if (modulation==-4) {
|
||||
voiced = 0;
|
||||
modulation = 0;
|
||||
}
|
||||
if (voiced) {
|
||||
frame_corr = 0.3875f + .175f*corr_id;
|
||||
} else {
|
||||
frame_corr = 0.0375f + .075f*corr_id;
|
||||
}
|
||||
for (sub=0;sub<4;sub++) {
|
||||
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
||||
p *= 1.f + modulation/16.f/7.f*(2*sub-3);
|
||||
p = MIN16(255, MAX16(33, p));
|
||||
features[sub][NB_BANDS] = .02f*(p-100.f);
|
||||
features[sub][NB_BANDS + 1] = frame_corr-.5f;
|
||||
}
|
||||
|
||||
features[3][0] = (c0_id-64)/4.f;
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
|
||||
}
|
||||
|
||||
sign = 1;
|
||||
if (vq_mid >= 4096) {
|
||||
vq_mid -= 4096;
|
||||
sign = -1;
|
||||
}
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
features[1][i] = sign*ceps_codebook_diff4[vq_mid*NB_BANDS + i];
|
||||
}
|
||||
if ((vq_mid&MULTI_MASK) < 2) {
|
||||
for (i=0;i<NB_BANDS;i++) features[1][i] += .5f*(vq_mem[i] + features[3][i]);
|
||||
} else if ((vq_mid&MULTI_MASK) == 2) {
|
||||
for (i=0;i<NB_BANDS;i++) features[1][i] += vq_mem[i];
|
||||
} else {
|
||||
for (i=0;i<NB_BANDS;i++) features[1][i] += features[3][i];
|
||||
}
|
||||
|
||||
perform_double_interp(features, vq_mem, interp_id);
|
||||
|
||||
RNN_COPY(vq_mem, &features[3][0], NB_BANDS);
|
||||
}
|
||||
#endif
|
|
@ -77,17 +77,13 @@ void free_blob(unsigned char *blob, int len) {
|
|||
# endif
|
||||
#endif
|
||||
|
||||
#define MODE_ENCODE 0
|
||||
#define MODE_DECODE 1
|
||||
#define MODE_FEATURES 2
|
||||
#define MODE_SYNTHESIS 3
|
||||
#define MODE_PLC 4
|
||||
#define MODE_ADDLPC 5
|
||||
|
||||
void usage(void) {
|
||||
fprintf(stderr, "usage: lpcnet_demo -encode <input.pcm> <compressed.lpcnet>\n");
|
||||
fprintf(stderr, " lpcnet_demo -decode <compressed.lpcnet> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -features <input.pcm> <features.f32>\n");
|
||||
fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
|
||||
fprintf(stderr, " lpcnet_demo -synthesis <features.f32> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -plc <plc_options> <percent> <input.pcm> <output.pcm>\n");
|
||||
fprintf(stderr, " lpcnet_demo -plc_file <plc_options> <percent> <input.pcm> <output.pcm>\n");
|
||||
|
@ -113,9 +109,7 @@ int main(int argc, char **argv) {
|
|||
const char *filename = "weights_blob.bin";
|
||||
#endif
|
||||
if (argc < 4) usage();
|
||||
if (strcmp(argv[1], "-encode") == 0) mode=MODE_ENCODE;
|
||||
else if (strcmp(argv[1], "-decode") == 0) mode=MODE_DECODE;
|
||||
else if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
|
||||
if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
|
||||
else if (strcmp(argv[1], "-synthesis") == 0) mode=MODE_SYNTHESIS;
|
||||
else if (strcmp(argv[1], "-plc") == 0) {
|
||||
mode=MODE_PLC;
|
||||
|
@ -160,33 +154,7 @@ int main(int argc, char **argv) {
|
|||
#ifdef USE_WEIGHTS_FILE
|
||||
data = load_blob(filename, &len);
|
||||
#endif
|
||||
if (mode == MODE_ENCODE) {
|
||||
LPCNetEncState *net;
|
||||
net = lpcnet_encoder_create();
|
||||
while (1) {
|
||||
unsigned char buf[LPCNET_COMPRESSED_SIZE];
|
||||
short pcm[LPCNET_PACKET_SAMPLES];
|
||||
size_t ret;
|
||||
ret = fread(pcm, sizeof(pcm[0]), LPCNET_PACKET_SAMPLES, fin);
|
||||
if (feof(fin) || ret != LPCNET_PACKET_SAMPLES) break;
|
||||
lpcnet_encode(net, pcm, buf);
|
||||
fwrite(buf, 1, LPCNET_COMPRESSED_SIZE, fout);
|
||||
}
|
||||
lpcnet_encoder_destroy(net);
|
||||
} else if (mode == MODE_DECODE) {
|
||||
LPCNetDecState *net;
|
||||
net = lpcnet_decoder_create();
|
||||
while (1) {
|
||||
unsigned char buf[LPCNET_COMPRESSED_SIZE];
|
||||
short pcm[LPCNET_PACKET_SAMPLES];
|
||||
size_t ret;
|
||||
ret = fread(buf, sizeof(buf[0]), LPCNET_COMPRESSED_SIZE, fin);
|
||||
if (feof(fin) || ret != LPCNET_COMPRESSED_SIZE) break;
|
||||
lpcnet_decode(net, buf, pcm);
|
||||
fwrite(pcm, sizeof(pcm[0]), LPCNET_PACKET_SAMPLES, fout);
|
||||
}
|
||||
lpcnet_decoder_destroy(net);
|
||||
} else if (mode == MODE_FEATURES) {
|
||||
if (mode == MODE_FEATURES) {
|
||||
LPCNetEncState *net;
|
||||
net = lpcnet_encoder_create();
|
||||
while (1) {
|
||||
|
|
685
dnn/lpcnet_enc.c
685
dnn/lpcnet_enc.c
|
@ -28,9 +28,8 @@
|
|||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifdef OPUS_BUILD
|
||||
/* FIXME: Use the optimized celt_pitch_xcorr() */
|
||||
#define celt_pitch_xcorr celt_pitch_xcorr_c
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
@ -45,424 +44,6 @@
|
|||
#include "lpcnet_private.h"
|
||||
#include "lpcnet.h"
|
||||
|
||||
#ifndef OPUS_BUILD
|
||||
|
||||
#define SURVIVORS 5
|
||||
|
||||
|
||||
void vq_quantize_mbest(const float *codebook, int nb_entries, const float *x, int ndim, int mbest, float *dist, int *index)
|
||||
{
|
||||
int i, j;
|
||||
for (i=0;i<mbest;i++) dist[i] = 1e15f;
|
||||
|
||||
for (i=0;i<nb_entries;i++)
|
||||
{
|
||||
float d=0;
|
||||
for (j=0;j<ndim;j++)
|
||||
d += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
|
||||
if (d<dist[mbest-1])
|
||||
{
|
||||
int pos;
|
||||
for (j=0;j<mbest-1;j++) {
|
||||
if (d < dist[j]) break;
|
||||
}
|
||||
pos = j;
|
||||
for (j=mbest-1;j>=pos+1;j--) {
|
||||
dist[j] = dist[j-1];
|
||||
index[j] = index[j-1];
|
||||
}
|
||||
dist[pos] = d;
|
||||
index[pos] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int vq_quantize(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out)
|
||||
{
|
||||
int i, j;
|
||||
float min_dist = 1e15f;
|
||||
int nearest = 0;
|
||||
|
||||
for (i=0;i<nb_entries;i++)
|
||||
{
|
||||
float dist=0;
|
||||
for (j=0;j<ndim;j++)
|
||||
dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
|
||||
if (dist<min_dist)
|
||||
{
|
||||
min_dist = dist;
|
||||
nearest = i;
|
||||
}
|
||||
}
|
||||
if (dist_out)
|
||||
*dist_out = min_dist;
|
||||
return nearest;
|
||||
}
|
||||
|
||||
int quantize_2stage(float *x)
|
||||
{
|
||||
int i;
|
||||
int id, id2, id3;
|
||||
float ref[NB_BANDS_1];
|
||||
RNN_COPY(ref, x, NB_BANDS_1);
|
||||
id = vq_quantize(ceps_codebook1, 1024, x, NB_BANDS_1, NULL);
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
|
||||
}
|
||||
id2 = vq_quantize(ceps_codebook2, 1024, x, NB_BANDS_1, NULL);
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
|
||||
}
|
||||
id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
|
||||
}
|
||||
if (0) {
|
||||
float err = 0;
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
err += (x[i]-ref[i])*(x[i]-ref[i]);
|
||||
}
|
||||
printf("%f\n", sqrt(err/NB_BANDS));
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
|
||||
int quantize_3stage_mbest(float *x, int entry[3])
|
||||
{
|
||||
int i, k;
|
||||
int id, id2, id3;
|
||||
float ref[NB_BANDS_1];
|
||||
int curr_index[SURVIVORS];
|
||||
int index1[SURVIVORS][3];
|
||||
int index2[SURVIVORS][3];
|
||||
int index3[SURVIVORS][3];
|
||||
float curr_dist[SURVIVORS];
|
||||
float glob_dist[SURVIVORS];
|
||||
RNN_COPY(ref, x, NB_BANDS_1);
|
||||
vq_quantize_mbest(ceps_codebook1, 1024, x, NB_BANDS_1, SURVIVORS, curr_dist, curr_index);
|
||||
for (k=0;k<SURVIVORS;k++) {
|
||||
index1[k][0] = curr_index[k];
|
||||
}
|
||||
for (k=0;k<SURVIVORS;k++) {
|
||||
int m;
|
||||
float diff[NB_BANDS_1];
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
diff[i] = x[i] - ceps_codebook1[index1[k][0]*NB_BANDS_1 + i];
|
||||
}
|
||||
vq_quantize_mbest(ceps_codebook2, 1024, diff, NB_BANDS_1, SURVIVORS, curr_dist, curr_index);
|
||||
if (k==0) {
|
||||
for (m=0;m<SURVIVORS;m++) {
|
||||
index2[m][0] = index1[k][0];
|
||||
index2[m][1] = curr_index[m];
|
||||
glob_dist[m] = curr_dist[m];
|
||||
}
|
||||
/*printf("%f ", glob_dist[0]);*/
|
||||
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
|
||||
int pos;
|
||||
m=0;
|
||||
for (pos=0;pos<SURVIVORS;pos++) {
|
||||
if (curr_dist[m] < glob_dist[pos]) {
|
||||
int j;
|
||||
for (j=SURVIVORS-1;j>=pos+1;j--) {
|
||||
glob_dist[j] = glob_dist[j-1];
|
||||
index2[j][0] = index2[j-1][0];
|
||||
index2[j][1] = index2[j-1][1];
|
||||
}
|
||||
glob_dist[pos] = curr_dist[m];
|
||||
index2[pos][0] = index1[k][0];
|
||||
index2[pos][1] = curr_index[m];
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (k=0;k<SURVIVORS;k++) {
|
||||
int m;
|
||||
float diff[NB_BANDS_1];
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
diff[i] = x[i] - ceps_codebook1[index2[k][0]*NB_BANDS_1 + i] - ceps_codebook2[index2[k][1]*NB_BANDS_1 + i];
|
||||
}
|
||||
vq_quantize_mbest(ceps_codebook3, 1024, diff, NB_BANDS_1, SURVIVORS, curr_dist, curr_index);
|
||||
if (k==0) {
|
||||
for (m=0;m<SURVIVORS;m++) {
|
||||
index3[m][0] = index2[k][0];
|
||||
index3[m][1] = index2[k][1];
|
||||
index3[m][2] = curr_index[m];
|
||||
glob_dist[m] = curr_dist[m];
|
||||
}
|
||||
/*printf("%f ", glob_dist[0]);*/
|
||||
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
|
||||
int pos;
|
||||
m=0;
|
||||
for (pos=0;pos<SURVIVORS;pos++) {
|
||||
if (curr_dist[m] < glob_dist[pos]) {
|
||||
int j;
|
||||
for (j=SURVIVORS-1;j>=pos+1;j--) {
|
||||
glob_dist[j] = glob_dist[j-1];
|
||||
index3[j][0] = index3[j-1][0];
|
||||
index3[j][1] = index3[j-1][1];
|
||||
index3[j][2] = index3[j-1][2];
|
||||
}
|
||||
glob_dist[pos] = curr_dist[m];
|
||||
index3[pos][0] = index2[k][0];
|
||||
index3[pos][1] = index2[k][1];
|
||||
index3[pos][2] = curr_index[m];
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
entry[0] = id = index3[0][0];
|
||||
entry[1] = id2 = index3[0][1];
|
||||
entry[2] = id3 = index3[0][2];
|
||||
/*printf("%f ", glob_dist[0]);*/
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
|
||||
}
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
|
||||
}
|
||||
/*id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);*/
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
|
||||
}
|
||||
if (0) {
|
||||
float err = 0;
|
||||
for (i=0;i<NB_BANDS_1;i++) {
|
||||
err += (x[i]-ref[i])*(x[i]-ref[i]);
|
||||
}
|
||||
printf("%f\n", sqrt(err/NB_BANDS));
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static int find_nearest_multi(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out, int sign)
|
||||
{
|
||||
int i, j;
|
||||
float min_dist = 1e15f;
|
||||
int nearest = 0;
|
||||
|
||||
for (i=0;i<nb_entries;i++)
|
||||
{
|
||||
int offset;
|
||||
float dist=0;
|
||||
offset = (i&MULTI_MASK)*ndim;
|
||||
for (j=0;j<ndim;j++)
|
||||
dist += (x[offset+j]-codebook[i*ndim+j])*(x[offset+j]-codebook[i*ndim+j]);
|
||||
if (dist<min_dist)
|
||||
{
|
||||
min_dist = dist;
|
||||
nearest = i;
|
||||
}
|
||||
}
|
||||
if (sign) {
|
||||
for (i=0;i<nb_entries;i++)
|
||||
{
|
||||
int offset;
|
||||
float dist=0;
|
||||
offset = (i&MULTI_MASK)*ndim;
|
||||
for (j=0;j<ndim;j++)
|
||||
dist += (x[offset+j]+codebook[i*ndim+j])*(x[offset+j]+codebook[i*ndim+j]);
|
||||
if (dist<min_dist)
|
||||
{
|
||||
min_dist = dist;
|
||||
nearest = i+nb_entries;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dist_out)
|
||||
*dist_out = min_dist;
|
||||
return nearest;
|
||||
}
|
||||
|
||||
|
||||
int quantize_diff(float *x, float *left, float *right, float *codebook, int bits, int sign, int *entry)
|
||||
{
|
||||
int i;
|
||||
int nb_entries;
|
||||
int id;
|
||||
float ref[NB_BANDS];
|
||||
float pred[4*NB_BANDS];
|
||||
float target[4*NB_BANDS];
|
||||
float s = 1;
|
||||
nb_entries = 1<<bits;
|
||||
RNN_COPY(ref, x, NB_BANDS);
|
||||
for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
|
||||
for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
|
||||
for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
|
||||
for (i=0;i<4*NB_BANDS;i++) target[i] = x[i%NB_BANDS] - pred[i];
|
||||
|
||||
id = find_nearest_multi(codebook, nb_entries, target, NB_BANDS, NULL, sign);
|
||||
*entry = id;
|
||||
if (id >= 1<<bits) {
|
||||
s = -1;
|
||||
id -= (1<<bits);
|
||||
}
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
x[i] = pred[(id&MULTI_MASK)*NB_BANDS + i] + s*codebook[id*NB_BANDS + i];
|
||||
}
|
||||
/*printf("%d %f ", id&MULTI_MASK, s);*/
|
||||
if (0) {
|
||||
float err = 0;
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
err += (x[i]-ref[i])*(x[i]-ref[i]);
|
||||
}
|
||||
printf("%f\n", sqrt(err/NB_BANDS));
|
||||
}
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
int interp_search(const float *x, const float *left, const float *right, float *dist_out)
|
||||
{
|
||||
int i, k;
|
||||
float min_dist = 1e15f;
|
||||
int best_pred = 0;
|
||||
float pred[4*NB_BANDS];
|
||||
for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
|
||||
for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
|
||||
for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
|
||||
|
||||
for (k=1;k<4;k++) {
|
||||
float dist = 0;
|
||||
for (i=0;i<NB_BANDS;i++) dist += (x[i] - pred[k*NB_BANDS+i])*(x[i] - pred[k*NB_BANDS+i]);
|
||||
dist_out[k-1] = dist;
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_pred = k;
|
||||
}
|
||||
}
|
||||
return best_pred - 1;
|
||||
}
|
||||
|
||||
|
||||
void interp_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)
|
||||
{
|
||||
int i, k;
|
||||
float min_dist = 1e15f;
|
||||
int best_pred = 0;
|
||||
float ref[NB_BANDS];
|
||||
float pred[4*NB_BANDS];
|
||||
(void)sign;
|
||||
(void)codebook;
|
||||
(void)bits;
|
||||
RNN_COPY(ref, x, NB_BANDS);
|
||||
for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
|
||||
for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
|
||||
for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
|
||||
|
||||
for (k=1;k<4;k++) {
|
||||
float dist = 0;
|
||||
for (i=0;i<NB_BANDS;i++) dist += (x[i] - pred[k*NB_BANDS+i])*(x[i] - pred[k*NB_BANDS+i]);
|
||||
if (dist < min_dist) {
|
||||
min_dist = dist;
|
||||
best_pred = k;
|
||||
}
|
||||
}
|
||||
/*printf("%d ", best_pred);*/
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
x[i] = pred[best_pred*NB_BANDS + i];
|
||||
}
|
||||
if (0) {
|
||||
float err = 0;
|
||||
for (i=0;i<NB_BANDS;i++) {
|
||||
err += (x[i]-ref[i])*(x[i]-ref[i]);
|
||||
}
|
||||
printf("%f\n", sqrt(err/NB_BANDS));
|
||||
}
|
||||
}
|
||||
|
||||
int double_interp_search(float features[4][NB_TOTAL_FEATURES], const float *mem) {
|
||||
int i, j;
|
||||
int best_id=0;
|
||||
float min_dist = 1e15f;
|
||||
float dist[2][3];
|
||||
interp_search(features[0], mem, features[1], dist[0]);
|
||||
interp_search(features[2], features[1], features[3], dist[1]);
|
||||
for (i=0;i<3;i++) {
|
||||
for (j=0;j<3;j++) {
|
||||
float d;
|
||||
int id;
|
||||
id = 3*i + j;
|
||||
d = dist[0][i] + dist[1][j];
|
||||
if (d < min_dist && id != FORBIDDEN_INTERP) {
|
||||
min_dist = d;
|
||||
best_id = id;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*printf("%d %d %f %d %f\n", id0, id1, dist[0][id0] + dist[1][id1], best_id, min_dist);*/
|
||||
return best_id - (best_id >= FORBIDDEN_INTERP);
|
||||
}
|
||||
|
||||
|
||||
void perform_interp_relaxation(float features[4][NB_TOTAL_FEATURES], const float *mem) {
|
||||
int id0, id1;
|
||||
int best_id;
|
||||
int i;
|
||||
float count, count_1;
|
||||
best_id = double_interp_search(features, mem);
|
||||
best_id += (best_id >= FORBIDDEN_INTERP);
|
||||
id0 = best_id / 3;
|
||||
id1 = best_id % 3;
|
||||
count = 1;
|
||||
if (id0 != 1) {
|
||||
float t = (id0==0) ? .5f : 1.f;
|
||||
for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[0][i];
|
||||
count += t;
|
||||
}
|
||||
if (id1 != 2) {
|
||||
float t = (id1==0) ? .5f : 1.f;
|
||||
for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[2][i];
|
||||
count += t;
|
||||
}
|
||||
count_1 = 1.f/count;
|
||||
for (i=0;i<NB_BANDS;i++) features[1][i] *= count_1;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int byte_pos;
|
||||
int bit_pos;
|
||||
int max_bytes;
|
||||
unsigned char *chars;
|
||||
} packer;
|
||||
|
||||
|
||||
void bits_packer_init(packer *bits, unsigned char *buf, int size) {
|
||||
bits->byte_pos = 0;
|
||||
bits->bit_pos = 0;
|
||||
bits->max_bytes = size;
|
||||
bits->chars = buf;
|
||||
RNN_CLEAR(buf, size);
|
||||
}
|
||||
|
||||
void bits_pack(packer *bits, unsigned int data, int nb_bits) {
|
||||
while(nb_bits)
|
||||
{
|
||||
int bit;
|
||||
if (bits->byte_pos == bits->max_bytes) {
|
||||
fprintf(stderr, "something went horribly wrong\n");
|
||||
return;
|
||||
}
|
||||
bit = (data>>(nb_bits-1))&1;
|
||||
bits->chars[bits->byte_pos] |= bit<<(BITS_PER_CHAR-1-bits->bit_pos);
|
||||
bits->bit_pos++;
|
||||
|
||||
if (bits->bit_pos==BITS_PER_CHAR)
|
||||
{
|
||||
bits->bit_pos=0;
|
||||
bits->byte_pos++;
|
||||
if (bits->byte_pos < bits->max_bytes) bits->chars[bits->byte_pos] = 0;
|
||||
}
|
||||
nb_bits--;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
LPCNET_EXPORT int lpcnet_encoder_get_size() {
|
||||
return sizeof(LPCNetEncState);
|
||||
|
@ -576,241 +157,6 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
|||
}
|
||||
}
|
||||
|
||||
void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int encode, int quantize) {
|
||||
int i;
|
||||
int sub;
|
||||
int best_i;
|
||||
int best[10];
|
||||
int pitch_prev[8][PITCH_MAX_PERIOD];
|
||||
float best_a=0;
|
||||
float best_b=0;
|
||||
float w;
|
||||
float sx=0, sxx=0, sxy=0, sy=0, sw=0;
|
||||
float frame_corr;
|
||||
int voiced;
|
||||
float frame_weight_sum = 1e-15f;
|
||||
float center_pitch;
|
||||
int main_pitch;
|
||||
int modulation;
|
||||
int corr_id = 0;
|
||||
#ifndef OPUS_BUILD
|
||||
int c0_id=0;
|
||||
int vq_end[3]={0};
|
||||
int vq_mid=0;
|
||||
int interp_id=0;
|
||||
#endif
|
||||
for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
|
||||
for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
|
||||
for(sub=0;sub<8;sub++) {
|
||||
float max_path_all = -1e15f;
|
||||
best_i = 0;
|
||||
for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
|
||||
float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
|
||||
if (st->xc[2+sub][i] < xc_half*1.1f) st->xc[2+sub][i] *= .8f;
|
||||
}
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
|
||||
int j;
|
||||
float max_prev;
|
||||
max_prev = st->pitch_max_path_all - 6.f;
|
||||
pitch_prev[sub][i] = st->best_i;
|
||||
for (j=IMAX(-4, -i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
|
||||
if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
|
||||
max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
|
||||
pitch_prev[sub][i] = i+j;
|
||||
}
|
||||
}
|
||||
st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
|
||||
if (st->pitch_max_path[1][i] > max_path_all) {
|
||||
max_path_all = st->pitch_max_path[1][i];
|
||||
best_i = i;
|
||||
}
|
||||
}
|
||||
/* Renormalize. */
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
||||
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
||||
printf("\n");*/
|
||||
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
||||
st->pitch_max_path_all = max_path_all;
|
||||
st->best_i = best_i;
|
||||
}
|
||||
best_i = st->best_i;
|
||||
frame_corr = 0;
|
||||
/* Backward pass. */
|
||||
for (sub=7;sub>=0;sub--) {
|
||||
best[2+sub] = PITCH_MAX_PERIOD-best_i;
|
||||
frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
|
||||
best_i = pitch_prev[sub][best_i];
|
||||
}
|
||||
frame_corr /= 8;
|
||||
if (quantize && frame_corr < 0) frame_corr = 0;
|
||||
for (sub=0;sub<8;sub++) {
|
||||
/*printf("%d %f\n", best[2+sub], frame_corr);*/
|
||||
}
|
||||
/*printf("\n");*/
|
||||
for (sub=2;sub<10;sub++) {
|
||||
w = st->frame_weight[sub];
|
||||
sw += w;
|
||||
sx += w*sub;
|
||||
sxx += w*sub*sub;
|
||||
sxy += w*sub*best[sub];
|
||||
sy += w*best[sub];
|
||||
}
|
||||
voiced = frame_corr >= .3;
|
||||
/* Linear regression to figure out the pitch contour. */
|
||||
best_a = (sw*sxy - sx*sy)/(sw*sxx - sx*sx);
|
||||
if (voiced) {
|
||||
float max_a;
|
||||
float mean_pitch = sy/sw;
|
||||
/* Allow a relative variation of up to 1/4 over 8 sub-frames. */
|
||||
max_a = mean_pitch/32;
|
||||
best_a = MIN16(max_a, MAX16(-max_a, best_a));
|
||||
corr_id = (int)floor((frame_corr-.3f)/.175f);
|
||||
if (quantize) frame_corr = 0.3875f + .175f*corr_id;
|
||||
} else {
|
||||
best_a = 0;
|
||||
corr_id = (int)floor(frame_corr/.075f);
|
||||
if (quantize) frame_corr = 0.0375f + .075f*corr_id;
|
||||
}
|
||||
/*best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);*/
|
||||
best_b = (sy - best_a*sx)/sw;
|
||||
/* Quantizing the pitch as "main" pitch + slope. */
|
||||
center_pitch = best_b+5.5f*best_a;
|
||||
main_pitch = (int)floor(.5 + 21.*1.442695041*log(center_pitch/PITCH_MIN_PERIOD));
|
||||
main_pitch = IMAX(0, IMIN(63, main_pitch));
|
||||
modulation = (int)floor(.5 + 16*7*best_a/center_pitch);
|
||||
modulation = IMAX(-3, IMIN(3, modulation));
|
||||
/*printf("%d %d\n", main_pitch, modulation);*/
|
||||
/*printf("%f %f\n", best_a/center_pitch, best_corr);*/
|
||||
/*for (sub=2;sub<10;sub++) printf("%f %d %f\n", best_b + sub*best_a, best[sub], best_corr);*/
|
||||
for (sub=0;sub<4;sub++) {
|
||||
if (quantize) {
|
||||
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
||||
p *= 1.f + modulation/16.f/7.f*(2*sub-3);
|
||||
p = MIN16(255, MAX16(33, p));
|
||||
st->features[sub][NB_BANDS] = .02f*(p-100);
|
||||
st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
|
||||
} else {
|
||||
st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
||||
st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
|
||||
}
|
||||
/*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
|
||||
}
|
||||
/*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
|
||||
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
||||
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
|
||||
#ifndef OPUS_BUILD
|
||||
if (quantize) {
|
||||
/*printf("%f\n", st->features[3][0]);*/
|
||||
c0_id = (int)floor(.5 + st->features[3][0]*4);
|
||||
c0_id = IMAX(-64, IMIN(63, c0_id));
|
||||
st->features[3][0] = c0_id/4.f;
|
||||
quantize_3stage_mbest(&st->features[3][1], vq_end);
|
||||
/*perform_interp_relaxation(st->features, st->vq_mem);*/
|
||||
quantize_diff(&st->features[1][0], st->vq_mem, &st->features[3][0], ceps_codebook_diff4, 12, 1, &vq_mid);
|
||||
interp_id = double_interp_search(st->features, st->vq_mem);
|
||||
perform_double_interp(st->features, st->vq_mem, interp_id);
|
||||
}
|
||||
#endif
|
||||
for (sub=0;sub<4;sub++) {
|
||||
lpc_from_cepstrum(st->lpc, st->features[sub]);
|
||||
for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i];
|
||||
}
|
||||
/*printf("\n");*/
|
||||
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
||||
if (encode) {
|
||||
#ifndef OPUS_BUILD
|
||||
packer bits;
|
||||
/*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id+64, main_pitch, voiced ? modulation+4 : 0, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/
|
||||
bits_packer_init(&bits, buf, 8);
|
||||
bits_pack(&bits, c0_id+64, 7);
|
||||
bits_pack(&bits, main_pitch, 6);
|
||||
bits_pack(&bits, voiced ? modulation+4 : 0, 3);
|
||||
bits_pack(&bits, corr_id, 2);
|
||||
bits_pack(&bits, vq_end[0], 10);
|
||||
bits_pack(&bits, vq_end[1], 10);
|
||||
bits_pack(&bits, vq_end[2], 10);
|
||||
bits_pack(&bits, vq_mid, 13);
|
||||
bits_pack(&bits, interp_id, 3);
|
||||
if (ffeat) fwrite(buf, 1, 8, ffeat);
|
||||
#else
|
||||
(void)buf;
|
||||
#endif
|
||||
} else if (ffeat) {
|
||||
for (i=0;i<4;i++) {
|
||||
fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {
|
||||
int i;
|
||||
int sub;
|
||||
int best_i;
|
||||
int best[10];
|
||||
int pitch_prev[8][PITCH_MAX_PERIOD];
|
||||
float frame_corr;
|
||||
float frame_weight_sum = 1e-15f;
|
||||
for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
|
||||
for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
|
||||
for(sub=0;sub<8;sub++) {
|
||||
float max_path_all = -1e15f;
|
||||
best_i = 0;
|
||||
for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
|
||||
float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
|
||||
if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8f;
|
||||
}
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
|
||||
int j;
|
||||
float max_prev;
|
||||
max_prev = st->pitch_max_path_all - 6.f;
|
||||
pitch_prev[sub][i] = st->best_i;
|
||||
for (j=IMAX(-4, -i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
|
||||
if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
|
||||
max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
|
||||
pitch_prev[sub][i] = i+j;
|
||||
}
|
||||
}
|
||||
st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
|
||||
if (st->pitch_max_path[1][i] > max_path_all) {
|
||||
max_path_all = st->pitch_max_path[1][i];
|
||||
best_i = i;
|
||||
}
|
||||
}
|
||||
/* Renormalize. */
|
||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
||||
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
||||
printf("\n");*/
|
||||
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
||||
st->pitch_max_path_all = max_path_all;
|
||||
st->best_i = best_i;
|
||||
}
|
||||
best_i = st->best_i;
|
||||
frame_corr = 0;
|
||||
/* Backward pass. */
|
||||
for (sub=7;sub>=0;sub--) {
|
||||
best[2+sub] = PITCH_MAX_PERIOD-best_i;
|
||||
frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
|
||||
best_i = pitch_prev[sub][best_i];
|
||||
}
|
||||
frame_corr /= 8;
|
||||
for (sub=0;sub<4;sub++) {
|
||||
st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
||||
st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
|
||||
/*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
|
||||
}
|
||||
/*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
|
||||
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
||||
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
|
||||
/*printf("\n");*/
|
||||
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
||||
if (ffeat) {
|
||||
for (i=0;i<4;i++) {
|
||||
fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
|
||||
int i;
|
||||
int sub;
|
||||
|
@ -879,35 +225,6 @@ void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
|
|||
}
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_encode(LPCNetEncState *st, const short *pcm, unsigned char *buf) {
|
||||
int i, k;
|
||||
for (k=0;k<4;k++) {
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[k*FRAME_SIZE + i];
|
||||
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
st->pcount = k;
|
||||
compute_frame_features(st, x);
|
||||
}
|
||||
process_superframe(st, buf, NULL, 1, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
LPCNET_EXPORT int lpcnet_compute_features(LPCNetEncState *st, const short *pcm, float features[4][NB_TOTAL_FEATURES]) {
|
||||
int i, k;
|
||||
for (k=0;k<4;k++) {
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[k*FRAME_SIZE + i];
|
||||
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
st->pcount = k;
|
||||
compute_frame_features(st, x);
|
||||
}
|
||||
process_superframe(st, NULL, NULL, 0, 0);
|
||||
for (k=0;k<4;k++) {
|
||||
RNN_COPY(&features[k][0], &st->features[k][0], NB_TOTAL_FEATURES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES]) {
|
||||
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||
compute_frame_features(st, x);
|
||||
|
|
|
@ -105,19 +105,10 @@ struct LPCNetPLCState {
|
|||
short queued_samples[FRAME_SIZE];
|
||||
};
|
||||
|
||||
#ifndef OPUS_BUILD
|
||||
extern float ceps_codebook1[];
|
||||
extern float ceps_codebook2[];
|
||||
extern float ceps_codebook3[];
|
||||
extern float ceps_codebook_diff4[];
|
||||
#endif
|
||||
|
||||
void preemphasis(float *y, float *mem, const float *x, float coef, int N);
|
||||
|
||||
void perform_double_interp(float features[4][NB_TOTAL_FEATURES], const float *mem, int best_id);
|
||||
|
||||
void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int encode, int quantize);
|
||||
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in);
|
||||
|
||||
void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);
|
||||
|
|
|
@ -4,7 +4,6 @@ dnn/common.c \
|
|||
dnn/freq.c \
|
||||
dnn/kiss99.c \
|
||||
dnn/lpcnet.c \
|
||||
dnn/lpcnet_dec.c \
|
||||
dnn/lpcnet_enc.c \
|
||||
dnn/lpcnet_plc.c \
|
||||
dnn/lpcnet_tables.c \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue