Remove support for LPCNet quantization

This commit is contained in:
Jean-Marc Valin 2023-06-22 00:44:45 -04:00
parent bfa01f1a1c
commit 247e6a587c
No known key found for this signature in database
GPG key ID: 531A52533318F00A
8 changed files with 15 additions and 972 deletions

View file

@ -28,9 +28,8 @@
#include "config.h"
#endif
#ifdef OPUS_BUILD
/* FIXME: Use the optimized celt_pitch_xcorr() */
#define celt_pitch_xcorr celt_pitch_xcorr_c
#endif
#include <stdlib.h>
#include <string.h>
@ -45,424 +44,6 @@
#include "lpcnet_private.h"
#include "lpcnet.h"
#ifndef OPUS_BUILD
#define SURVIVORS 5
void vq_quantize_mbest(const float *codebook, int nb_entries, const float *x, int ndim, int mbest, float *dist, int *index)
{
int i, j;
for (i=0;i<mbest;i++) dist[i] = 1e15f;
for (i=0;i<nb_entries;i++)
{
float d=0;
for (j=0;j<ndim;j++)
d += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
if (d<dist[mbest-1])
{
int pos;
for (j=0;j<mbest-1;j++) {
if (d < dist[j]) break;
}
pos = j;
for (j=mbest-1;j>=pos+1;j--) {
dist[j] = dist[j-1];
index[j] = index[j-1];
}
dist[pos] = d;
index[pos] = i;
}
}
}
int vq_quantize(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out)
{
int i, j;
float min_dist = 1e15f;
int nearest = 0;
for (i=0;i<nb_entries;i++)
{
float dist=0;
for (j=0;j<ndim;j++)
dist += (x[j]-codebook[i*ndim+j])*(x[j]-codebook[i*ndim+j]);
if (dist<min_dist)
{
min_dist = dist;
nearest = i;
}
}
if (dist_out)
*dist_out = min_dist;
return nearest;
}
int quantize_2stage(float *x)
{
int i;
int id, id2, id3;
float ref[NB_BANDS_1];
RNN_COPY(ref, x, NB_BANDS_1);
id = vq_quantize(ceps_codebook1, 1024, x, NB_BANDS_1, NULL);
for (i=0;i<NB_BANDS_1;i++) {
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
}
id2 = vq_quantize(ceps_codebook2, 1024, x, NB_BANDS_1, NULL);
for (i=0;i<NB_BANDS_1;i++) {
x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
}
id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);
for (i=0;i<NB_BANDS_1;i++) {
x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
}
if (0) {
float err = 0;
for (i=0;i<NB_BANDS_1;i++) {
err += (x[i]-ref[i])*(x[i]-ref[i]);
}
printf("%f\n", sqrt(err/NB_BANDS));
}
return id;
}
int quantize_3stage_mbest(float *x, int entry[3])
{
int i, k;
int id, id2, id3;
float ref[NB_BANDS_1];
int curr_index[SURVIVORS];
int index1[SURVIVORS][3];
int index2[SURVIVORS][3];
int index3[SURVIVORS][3];
float curr_dist[SURVIVORS];
float glob_dist[SURVIVORS];
RNN_COPY(ref, x, NB_BANDS_1);
vq_quantize_mbest(ceps_codebook1, 1024, x, NB_BANDS_1, SURVIVORS, curr_dist, curr_index);
for (k=0;k<SURVIVORS;k++) {
index1[k][0] = curr_index[k];
}
for (k=0;k<SURVIVORS;k++) {
int m;
float diff[NB_BANDS_1];
for (i=0;i<NB_BANDS_1;i++) {
diff[i] = x[i] - ceps_codebook1[index1[k][0]*NB_BANDS_1 + i];
}
vq_quantize_mbest(ceps_codebook2, 1024, diff, NB_BANDS_1, SURVIVORS, curr_dist, curr_index);
if (k==0) {
for (m=0;m<SURVIVORS;m++) {
index2[m][0] = index1[k][0];
index2[m][1] = curr_index[m];
glob_dist[m] = curr_dist[m];
}
/*printf("%f ", glob_dist[0]);*/
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
int pos;
m=0;
for (pos=0;pos<SURVIVORS;pos++) {
if (curr_dist[m] < glob_dist[pos]) {
int j;
for (j=SURVIVORS-1;j>=pos+1;j--) {
glob_dist[j] = glob_dist[j-1];
index2[j][0] = index2[j-1][0];
index2[j][1] = index2[j-1][1];
}
glob_dist[pos] = curr_dist[m];
index2[pos][0] = index1[k][0];
index2[pos][1] = curr_index[m];
m++;
}
}
}
}
for (k=0;k<SURVIVORS;k++) {
int m;
float diff[NB_BANDS_1];
for (i=0;i<NB_BANDS_1;i++) {
diff[i] = x[i] - ceps_codebook1[index2[k][0]*NB_BANDS_1 + i] - ceps_codebook2[index2[k][1]*NB_BANDS_1 + i];
}
vq_quantize_mbest(ceps_codebook3, 1024, diff, NB_BANDS_1, SURVIVORS, curr_dist, curr_index);
if (k==0) {
for (m=0;m<SURVIVORS;m++) {
index3[m][0] = index2[k][0];
index3[m][1] = index2[k][1];
index3[m][2] = curr_index[m];
glob_dist[m] = curr_dist[m];
}
/*printf("%f ", glob_dist[0]);*/
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
int pos;
m=0;
for (pos=0;pos<SURVIVORS;pos++) {
if (curr_dist[m] < glob_dist[pos]) {
int j;
for (j=SURVIVORS-1;j>=pos+1;j--) {
glob_dist[j] = glob_dist[j-1];
index3[j][0] = index3[j-1][0];
index3[j][1] = index3[j-1][1];
index3[j][2] = index3[j-1][2];
}
glob_dist[pos] = curr_dist[m];
index3[pos][0] = index2[k][0];
index3[pos][1] = index2[k][1];
index3[pos][2] = curr_index[m];
m++;
}
}
}
}
entry[0] = id = index3[0][0];
entry[1] = id2 = index3[0][1];
entry[2] = id3 = index3[0][2];
/*printf("%f ", glob_dist[0]);*/
for (i=0;i<NB_BANDS_1;i++) {
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
}
for (i=0;i<NB_BANDS_1;i++) {
x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
}
/*id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);*/
for (i=0;i<NB_BANDS_1;i++) {
x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
}
if (0) {
float err = 0;
for (i=0;i<NB_BANDS_1;i++) {
err += (x[i]-ref[i])*(x[i]-ref[i]);
}
printf("%f\n", sqrt(err/NB_BANDS));
}
return id;
}
static int find_nearest_multi(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out, int sign)
{
int i, j;
float min_dist = 1e15f;
int nearest = 0;
for (i=0;i<nb_entries;i++)
{
int offset;
float dist=0;
offset = (i&MULTI_MASK)*ndim;
for (j=0;j<ndim;j++)
dist += (x[offset+j]-codebook[i*ndim+j])*(x[offset+j]-codebook[i*ndim+j]);
if (dist<min_dist)
{
min_dist = dist;
nearest = i;
}
}
if (sign) {
for (i=0;i<nb_entries;i++)
{
int offset;
float dist=0;
offset = (i&MULTI_MASK)*ndim;
for (j=0;j<ndim;j++)
dist += (x[offset+j]+codebook[i*ndim+j])*(x[offset+j]+codebook[i*ndim+j]);
if (dist<min_dist)
{
min_dist = dist;
nearest = i+nb_entries;
}
}
}
if (dist_out)
*dist_out = min_dist;
return nearest;
}
int quantize_diff(float *x, float *left, float *right, float *codebook, int bits, int sign, int *entry)
{
int i;
int nb_entries;
int id;
float ref[NB_BANDS];
float pred[4*NB_BANDS];
float target[4*NB_BANDS];
float s = 1;
nb_entries = 1<<bits;
RNN_COPY(ref, x, NB_BANDS);
for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
for (i=0;i<4*NB_BANDS;i++) target[i] = x[i%NB_BANDS] - pred[i];
id = find_nearest_multi(codebook, nb_entries, target, NB_BANDS, NULL, sign);
*entry = id;
if (id >= 1<<bits) {
s = -1;
id -= (1<<bits);
}
for (i=0;i<NB_BANDS;i++) {
x[i] = pred[(id&MULTI_MASK)*NB_BANDS + i] + s*codebook[id*NB_BANDS + i];
}
/*printf("%d %f ", id&MULTI_MASK, s);*/
if (0) {
float err = 0;
for (i=0;i<NB_BANDS;i++) {
err += (x[i]-ref[i])*(x[i]-ref[i]);
}
printf("%f\n", sqrt(err/NB_BANDS));
}
return id;
}
int interp_search(const float *x, const float *left, const float *right, float *dist_out)
{
int i, k;
float min_dist = 1e15f;
int best_pred = 0;
float pred[4*NB_BANDS];
for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
for (k=1;k<4;k++) {
float dist = 0;
for (i=0;i<NB_BANDS;i++) dist += (x[i] - pred[k*NB_BANDS+i])*(x[i] - pred[k*NB_BANDS+i]);
dist_out[k-1] = dist;
if (dist < min_dist) {
min_dist = dist;
best_pred = k;
}
}
return best_pred - 1;
}
void interp_diff(float *x, float *left, float *right, float *codebook, int bits, int sign)
{
int i, k;
float min_dist = 1e15f;
int best_pred = 0;
float ref[NB_BANDS];
float pred[4*NB_BANDS];
(void)sign;
(void)codebook;
(void)bits;
RNN_COPY(ref, x, NB_BANDS);
for (i=0;i<NB_BANDS;i++) pred[i] = pred[NB_BANDS+i] = .5f*(left[i] + right[i]);
for (i=0;i<NB_BANDS;i++) pred[2*NB_BANDS+i] = left[i];
for (i=0;i<NB_BANDS;i++) pred[3*NB_BANDS+i] = right[i];
for (k=1;k<4;k++) {
float dist = 0;
for (i=0;i<NB_BANDS;i++) dist += (x[i] - pred[k*NB_BANDS+i])*(x[i] - pred[k*NB_BANDS+i]);
if (dist < min_dist) {
min_dist = dist;
best_pred = k;
}
}
/*printf("%d ", best_pred);*/
for (i=0;i<NB_BANDS;i++) {
x[i] = pred[best_pred*NB_BANDS + i];
}
if (0) {
float err = 0;
for (i=0;i<NB_BANDS;i++) {
err += (x[i]-ref[i])*(x[i]-ref[i]);
}
printf("%f\n", sqrt(err/NB_BANDS));
}
}
int double_interp_search(float features[4][NB_TOTAL_FEATURES], const float *mem) {
int i, j;
int best_id=0;
float min_dist = 1e15f;
float dist[2][3];
interp_search(features[0], mem, features[1], dist[0]);
interp_search(features[2], features[1], features[3], dist[1]);
for (i=0;i<3;i++) {
for (j=0;j<3;j++) {
float d;
int id;
id = 3*i + j;
d = dist[0][i] + dist[1][j];
if (d < min_dist && id != FORBIDDEN_INTERP) {
min_dist = d;
best_id = id;
}
}
}
/*printf("%d %d %f %d %f\n", id0, id1, dist[0][id0] + dist[1][id1], best_id, min_dist);*/
return best_id - (best_id >= FORBIDDEN_INTERP);
}
void perform_interp_relaxation(float features[4][NB_TOTAL_FEATURES], const float *mem) {
int id0, id1;
int best_id;
int i;
float count, count_1;
best_id = double_interp_search(features, mem);
best_id += (best_id >= FORBIDDEN_INTERP);
id0 = best_id / 3;
id1 = best_id % 3;
count = 1;
if (id0 != 1) {
float t = (id0==0) ? .5f : 1.f;
for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[0][i];
count += t;
}
if (id1 != 2) {
float t = (id1==0) ? .5f : 1.f;
for (i=0;i<NB_BANDS;i++) features[1][i] += t*features[2][i];
count += t;
}
count_1 = 1.f/count;
for (i=0;i<NB_BANDS;i++) features[1][i] *= count_1;
}
typedef struct {
int byte_pos;
int bit_pos;
int max_bytes;
unsigned char *chars;
} packer;
void bits_packer_init(packer *bits, unsigned char *buf, int size) {
bits->byte_pos = 0;
bits->bit_pos = 0;
bits->max_bytes = size;
bits->chars = buf;
RNN_CLEAR(buf, size);
}
void bits_pack(packer *bits, unsigned int data, int nb_bits) {
while(nb_bits)
{
int bit;
if (bits->byte_pos == bits->max_bytes) {
fprintf(stderr, "something went horribly wrong\n");
return;
}
bit = (data>>(nb_bits-1))&1;
bits->chars[bits->byte_pos] |= bit<<(BITS_PER_CHAR-1-bits->bit_pos);
bits->bit_pos++;
if (bits->bit_pos==BITS_PER_CHAR)
{
bits->bit_pos=0;
bits->byte_pos++;
if (bits->byte_pos < bits->max_bytes) bits->chars[bits->byte_pos] = 0;
}
nb_bits--;
}
}
#endif
LPCNET_EXPORT int lpcnet_encoder_get_size() {
return sizeof(LPCNetEncState);
@ -576,241 +157,6 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
}
}
void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int encode, int quantize) {
int i;
int sub;
int best_i;
int best[10];
int pitch_prev[8][PITCH_MAX_PERIOD];
float best_a=0;
float best_b=0;
float w;
float sx=0, sxx=0, sxy=0, sy=0, sw=0;
float frame_corr;
int voiced;
float frame_weight_sum = 1e-15f;
float center_pitch;
int main_pitch;
int modulation;
int corr_id = 0;
#ifndef OPUS_BUILD
int c0_id=0;
int vq_end[3]={0};
int vq_mid=0;
int interp_id=0;
#endif
for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
for(sub=0;sub<8;sub++) {
float max_path_all = -1e15f;
best_i = 0;
for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
if (st->xc[2+sub][i] < xc_half*1.1f) st->xc[2+sub][i] *= .8f;
}
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
int j;
float max_prev;
max_prev = st->pitch_max_path_all - 6.f;
pitch_prev[sub][i] = st->best_i;
for (j=IMAX(-4, -i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
pitch_prev[sub][i] = i+j;
}
}
st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
if (st->pitch_max_path[1][i] > max_path_all) {
max_path_all = st->pitch_max_path[1][i];
best_i = i;
}
}
/* Renormalize. */
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
printf("\n");*/
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
st->pitch_max_path_all = max_path_all;
st->best_i = best_i;
}
best_i = st->best_i;
frame_corr = 0;
/* Backward pass. */
for (sub=7;sub>=0;sub--) {
best[2+sub] = PITCH_MAX_PERIOD-best_i;
frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
best_i = pitch_prev[sub][best_i];
}
frame_corr /= 8;
if (quantize && frame_corr < 0) frame_corr = 0;
for (sub=0;sub<8;sub++) {
/*printf("%d %f\n", best[2+sub], frame_corr);*/
}
/*printf("\n");*/
for (sub=2;sub<10;sub++) {
w = st->frame_weight[sub];
sw += w;
sx += w*sub;
sxx += w*sub*sub;
sxy += w*sub*best[sub];
sy += w*best[sub];
}
voiced = frame_corr >= .3;
/* Linear regression to figure out the pitch contour. */
best_a = (sw*sxy - sx*sy)/(sw*sxx - sx*sx);
if (voiced) {
float max_a;
float mean_pitch = sy/sw;
/* Allow a relative variation of up to 1/4 over 8 sub-frames. */
max_a = mean_pitch/32;
best_a = MIN16(max_a, MAX16(-max_a, best_a));
corr_id = (int)floor((frame_corr-.3f)/.175f);
if (quantize) frame_corr = 0.3875f + .175f*corr_id;
} else {
best_a = 0;
corr_id = (int)floor(frame_corr/.075f);
if (quantize) frame_corr = 0.0375f + .075f*corr_id;
}
/*best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);*/
best_b = (sy - best_a*sx)/sw;
/* Quantizing the pitch as "main" pitch + slope. */
center_pitch = best_b+5.5f*best_a;
main_pitch = (int)floor(.5 + 21.*1.442695041*log(center_pitch/PITCH_MIN_PERIOD));
main_pitch = IMAX(0, IMIN(63, main_pitch));
modulation = (int)floor(.5 + 16*7*best_a/center_pitch);
modulation = IMAX(-3, IMIN(3, modulation));
/*printf("%d %d\n", main_pitch, modulation);*/
/*printf("%f %f\n", best_a/center_pitch, best_corr);*/
/*for (sub=2;sub<10;sub++) printf("%f %d %f\n", best_b + sub*best_a, best[sub], best_corr);*/
for (sub=0;sub<4;sub++) {
if (quantize) {
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
p *= 1.f + modulation/16.f/7.f*(2*sub-3);
p = MIN16(255, MAX16(33, p));
st->features[sub][NB_BANDS] = .02f*(p-100);
st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
} else {
st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
}
/*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
}
/*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
#ifndef OPUS_BUILD
if (quantize) {
/*printf("%f\n", st->features[3][0]);*/
c0_id = (int)floor(.5 + st->features[3][0]*4);
c0_id = IMAX(-64, IMIN(63, c0_id));
st->features[3][0] = c0_id/4.f;
quantize_3stage_mbest(&st->features[3][1], vq_end);
/*perform_interp_relaxation(st->features, st->vq_mem);*/
quantize_diff(&st->features[1][0], st->vq_mem, &st->features[3][0], ceps_codebook_diff4, 12, 1, &vq_mid);
interp_id = double_interp_search(st->features, st->vq_mem);
perform_double_interp(st->features, st->vq_mem, interp_id);
}
#endif
for (sub=0;sub<4;sub++) {
lpc_from_cepstrum(st->lpc, st->features[sub]);
for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i];
}
/*printf("\n");*/
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
if (encode) {
#ifndef OPUS_BUILD
packer bits;
/*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id+64, main_pitch, voiced ? modulation+4 : 0, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/
bits_packer_init(&bits, buf, 8);
bits_pack(&bits, c0_id+64, 7);
bits_pack(&bits, main_pitch, 6);
bits_pack(&bits, voiced ? modulation+4 : 0, 3);
bits_pack(&bits, corr_id, 2);
bits_pack(&bits, vq_end[0], 10);
bits_pack(&bits, vq_end[1], 10);
bits_pack(&bits, vq_end[2], 10);
bits_pack(&bits, vq_mid, 13);
bits_pack(&bits, interp_id, 3);
if (ffeat) fwrite(buf, 1, 8, ffeat);
#else
(void)buf;
#endif
} else if (ffeat) {
for (i=0;i<4;i++) {
fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
}
}
}
void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {
int i;
int sub;
int best_i;
int best[10];
int pitch_prev[8][PITCH_MAX_PERIOD];
float frame_corr;
float frame_weight_sum = 1e-15f;
for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub];
for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum);
for(sub=0;sub<8;sub++) {
float max_path_all = -1e15f;
best_i = 0;
for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) {
float xc_half = MAX16(MAX16(st->xc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]);
if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8f;
}
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
int j;
float max_prev;
max_prev = st->pitch_max_path_all - 6.f;
pitch_prev[sub][i] = st->best_i;
for (j=IMAX(-4, -i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) {
if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) {
max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j);
pitch_prev[sub][i] = i+j;
}
}
st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i];
if (st->pitch_max_path[1][i] > max_path_all) {
max_path_all = st->pitch_max_path[1][i];
best_i = i;
}
}
/* Renormalize. */
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
printf("\n");*/
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
st->pitch_max_path_all = max_path_all;
st->best_i = best_i;
}
best_i = st->best_i;
frame_corr = 0;
/* Backward pass. */
for (sub=7;sub>=0;sub--) {
best[2+sub] = PITCH_MAX_PERIOD-best_i;
frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i];
best_i = pitch_prev[sub][best_i];
}
frame_corr /= 8;
for (sub=0;sub<4;sub++) {
st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
st->features[sub][NB_BANDS + 1] = frame_corr-.5f;
/*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
}
/*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
/*printf("\n");*/
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
if (ffeat) {
for (i=0;i<4;i++) {
fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat);
}
}
}
void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
int i;
int sub;
@ -879,35 +225,6 @@ void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
}
}
LPCNET_EXPORT int lpcnet_encode(LPCNetEncState *st, const short *pcm, unsigned char *buf) {
int i, k;
for (k=0;k<4;k++) {
float x[FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[k*FRAME_SIZE + i];
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
st->pcount = k;
compute_frame_features(st, x);
}
process_superframe(st, buf, NULL, 1, 1);
return 0;
}
LPCNET_EXPORT int lpcnet_compute_features(LPCNetEncState *st, const short *pcm, float features[4][NB_TOTAL_FEATURES]) {
int i, k;
for (k=0;k<4;k++) {
float x[FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[k*FRAME_SIZE + i];
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
st->pcount = k;
compute_frame_features(st, x);
}
process_superframe(st, NULL, NULL, 0, 0);
for (k=0;k<4;k++) {
RNN_COPY(&features[k][0], &st->features[k][0], NB_TOTAL_FEATURES);
}
return 0;
}
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES]) {
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
compute_frame_features(st, x);