mirror of
https://github.com/xiph/opus.git
synced 2025-05-19 18:08:29 +00:00
Removing the unused features
Down to 20 features
This commit is contained in:
parent
b90729b83b
commit
6585843237
10 changed files with 23 additions and 30 deletions
|
@ -83,7 +83,7 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f
|
||||||
float p=0;
|
float p=0;
|
||||||
float e;
|
float e;
|
||||||
int j;
|
int j;
|
||||||
for (j=0;j<LPC_ORDER;j++) p -= st->features[k][2*NB_BANDS+3+j]*st->sig_mem[j];
|
for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j];
|
||||||
e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);
|
e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p);
|
||||||
/* Signal. */
|
/* Signal. */
|
||||||
data[4*i] = lin2ulaw(st->sig_mem[0]);
|
data[4*i] = lin2ulaw(st->sig_mem[0]);
|
||||||
|
|
|
@ -42,8 +42,8 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define NB_FEATURES 38
|
#define NB_FEATURES 20
|
||||||
#define NB_TOTAL_FEATURES 55
|
#define NB_TOTAL_FEATURES 36
|
||||||
|
|
||||||
/** Number of bytes in a compressed packet. */
|
/** Number of bytes in a compressed packet. */
|
||||||
#define LPCNET_COMPRESSED_SIZE 8
|
#define LPCNET_COMPRESSED_SIZE 8
|
||||||
|
|
|
@ -139,7 +139,7 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features,
|
||||||
float gru_b_condition[3*GRU_B_STATE_SIZE];
|
float gru_b_condition[3*GRU_B_STATE_SIZE];
|
||||||
int pitch;
|
int pitch;
|
||||||
/* Matches the Python code -- the 0.1 avoids rounding issues. */
|
/* Matches the Python code -- the 0.1 avoids rounding issues. */
|
||||||
pitch = (int)floor(.1 + 50*features[36]+100);
|
pitch = (int)floor(.1 + 50*features[18]+100);
|
||||||
pitch = IMIN(255, IMAX(33, pitch));
|
pitch = IMIN(255, IMAX(33, pitch));
|
||||||
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
|
memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0]));
|
||||||
lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
|
lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE];
|
||||||
|
|
|
@ -124,8 +124,8 @@ void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const un
|
||||||
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
||||||
p *= 1 + modulation/16./7.*(2*sub-3);
|
p *= 1 + modulation/16./7.*(2*sub-3);
|
||||||
p = MIN16(255, MAX16(33, p));
|
p = MIN16(255, MAX16(33, p));
|
||||||
features[sub][2*NB_BANDS] = .02*(p-100);
|
features[sub][NB_BANDS] = .02*(p-100);
|
||||||
features[sub][2*NB_BANDS + 1] = frame_corr-.5;
|
features[sub][NB_BANDS + 1] = frame_corr-.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
features[3][0] = (c0_id-64)/4.;
|
features[3][0] = (c0_id-64)/4.;
|
||||||
|
|
|
@ -115,7 +115,6 @@ int main(int argc, char **argv) {
|
||||||
fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
||||||
if (feof(fin)) break;
|
if (feof(fin)) break;
|
||||||
RNN_COPY(features, in_features, NB_FEATURES);
|
RNN_COPY(features, in_features, NB_FEATURES);
|
||||||
RNN_CLEAR(&features[18], 18);
|
|
||||||
lpcnet_synthesize(net, features, pcm, LPCNET_FRAME_SIZE);
|
lpcnet_synthesize(net, features, pcm, LPCNET_FRAME_SIZE);
|
||||||
fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
|
fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
#include "lpcnet.h"
|
#include "lpcnet.h"
|
||||||
|
|
||||||
|
|
||||||
//#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER)
|
//#define NB_FEATURES (NB_BANDS+2+LPC_ORDER)
|
||||||
|
|
||||||
|
|
||||||
#define SURVIVORS 5
|
#define SURVIVORS 5
|
||||||
|
@ -499,7 +499,6 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
||||||
float E = 0;
|
float E = 0;
|
||||||
float Ly[NB_BANDS];
|
float Ly[NB_BANDS];
|
||||||
float follow, logMax;
|
float follow, logMax;
|
||||||
float g;
|
|
||||||
kiss_fft_cpx X[FREQ_SIZE];
|
kiss_fft_cpx X[FREQ_SIZE];
|
||||||
float Ex[NB_BANDS];
|
float Ex[NB_BANDS];
|
||||||
float xcorr[PITCH_MAX_PERIOD];
|
float xcorr[PITCH_MAX_PERIOD];
|
||||||
|
@ -519,9 +518,8 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
||||||
}
|
}
|
||||||
dct(st->features[st->pcount], Ly);
|
dct(st->features[st->pcount], Ly);
|
||||||
st->features[st->pcount][0] -= 4;
|
st->features[st->pcount][0] -= 4;
|
||||||
g = lpc_from_cepstrum(st->lpc, st->features[st->pcount]);
|
lpc_from_cepstrum(st->lpc, st->features[st->pcount]);
|
||||||
st->features[st->pcount][2*NB_BANDS+2] = log10(g);
|
for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][NB_BANDS+2+i] = st->lpc[i];
|
||||||
for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][2*NB_BANDS+3+i] = st->lpc[i];
|
|
||||||
RNN_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
|
RNN_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
|
||||||
RNN_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
|
RNN_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
|
||||||
for (i=0;i<FRAME_SIZE;i++) {
|
for (i=0;i<FRAME_SIZE;i++) {
|
||||||
|
@ -663,13 +661,13 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
||||||
p *= 1 + modulation/16./7.*(2*sub-3);
|
p *= 1 + modulation/16./7.*(2*sub-3);
|
||||||
p = MIN16(255, MAX16(33, p));
|
p = MIN16(255, MAX16(33, p));
|
||||||
st->features[sub][2*NB_BANDS] = .02*(p-100);
|
st->features[sub][NB_BANDS] = .02*(p-100);
|
||||||
st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;
|
st->features[sub][NB_BANDS + 1] = frame_corr-.5;
|
||||||
} else {
|
} else {
|
||||||
st->features[sub][2*NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
||||||
st->features[sub][2*NB_BANDS + 1] = frame_corr-.5;
|
st->features[sub][NB_BANDS + 1] = frame_corr-.5;
|
||||||
}
|
}
|
||||||
//printf("%f %d %f\n", st->features[sub][2*NB_BANDS], best[2+2*sub], frame_corr);
|
//printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);
|
||||||
}
|
}
|
||||||
//printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);
|
//printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);
|
||||||
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
||||||
|
@ -686,9 +684,8 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
perform_double_interp(st->features, st->vq_mem, interp_id);
|
perform_double_interp(st->features, st->vq_mem, interp_id);
|
||||||
}
|
}
|
||||||
for (sub=0;sub<4;sub++) {
|
for (sub=0;sub<4;sub++) {
|
||||||
float g = lpc_from_cepstrum(st->lpc, st->features[sub]);
|
lpc_from_cepstrum(st->lpc, st->features[sub]);
|
||||||
st->features[sub][2*NB_BANDS+2] = log10(g);
|
for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i];
|
||||||
for (i=0;i<LPC_ORDER;i++) st->features[sub][2*NB_BANDS+3+i] = st->lpc[i];
|
|
||||||
}
|
}
|
||||||
//printf("\n");
|
//printf("\n");
|
||||||
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
||||||
|
|
|
@ -59,7 +59,6 @@ int main(int argc, char **argv) {
|
||||||
fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
||||||
if (feof(fin)) break;
|
if (feof(fin)) break;
|
||||||
RNN_COPY(features, in_features, NB_FEATURES);
|
RNN_COPY(features, in_features, NB_FEATURES);
|
||||||
RNN_CLEAR(&features[18], 18);
|
|
||||||
lpcnet_synthesize(net, features, pcm, FRAME_SIZE);
|
lpcnet_synthesize(net, features, pcm, FRAME_SIZE);
|
||||||
fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
|
fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout);
|
||||||
}
|
}
|
||||||
|
|
|
@ -212,7 +212,7 @@ class WeightClip(Constraint):
|
||||||
|
|
||||||
constraint = WeightClip(0.992)
|
constraint = WeightClip(0.992)
|
||||||
|
|
||||||
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False):
|
def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, training=False, adaptation=False, quantize=False):
|
||||||
pcm = Input(shape=(None, 3))
|
pcm = Input(shape=(None, 3))
|
||||||
feat = Input(shape=(None, nb_used_features))
|
feat = Input(shape=(None, nb_used_features))
|
||||||
pitch = Input(shape=(None, 1))
|
pitch = Input(shape=(None, 1))
|
||||||
|
|
|
@ -40,7 +40,7 @@ model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=
|
||||||
feature_file = sys.argv[1]
|
feature_file = sys.argv[1]
|
||||||
out_file = sys.argv[2]
|
out_file = sys.argv[2]
|
||||||
frame_size = model.frame_size
|
frame_size = model.frame_size
|
||||||
nb_features = 55
|
nb_features = 36
|
||||||
nb_used_features = model.nb_used_features
|
nb_used_features = model.nb_used_features
|
||||||
|
|
||||||
features = np.fromfile(feature_file, dtype='float32')
|
features = np.fromfile(feature_file, dtype='float32')
|
||||||
|
@ -50,12 +50,11 @@ feature_chunk_size = features.shape[0]
|
||||||
pcm_chunk_size = frame_size*feature_chunk_size
|
pcm_chunk_size = frame_size*feature_chunk_size
|
||||||
|
|
||||||
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
|
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
|
||||||
features[:,:,18:36] = 0
|
periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')
|
||||||
periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
model.load_weights('lpcnet34bq17_384_01.h5')
|
model.load_weights('lpcnet38Sn_384_02.h5');
|
||||||
|
|
||||||
order = 16
|
order = 16
|
||||||
|
|
||||||
|
@ -81,7 +80,7 @@ for c in range(0, nb_frames):
|
||||||
|
|
||||||
p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])
|
p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2])
|
||||||
#Lower the temperature for voiced frames to reduce noisiness
|
#Lower the temperature for voiced frames to reduce noisiness
|
||||||
p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 37] - .5))
|
p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5))
|
||||||
p = p/(1e-18 + np.sum(p))
|
p = p/(1e-18 + np.sum(p))
|
||||||
#Cut off the tail of the remaining distribution
|
#Cut off the tail of the remaining distribution
|
||||||
p = np.maximum(p-0.002, 0).astype('float64')
|
p = np.maximum(p-0.002, 0).astype('float64')
|
||||||
|
|
|
@ -104,7 +104,7 @@ with strategy.scope():
|
||||||
feature_file = args.features
|
feature_file = args.features
|
||||||
pcm_file = args.data # 16 bit unsigned short PCM samples
|
pcm_file = args.data # 16 bit unsigned short PCM samples
|
||||||
frame_size = model.frame_size
|
frame_size = model.frame_size
|
||||||
nb_features = 55
|
nb_features = 36
|
||||||
nb_used_features = model.nb_used_features
|
nb_used_features = model.nb_used_features
|
||||||
feature_chunk_size = 15
|
feature_chunk_size = 15
|
||||||
pcm_chunk_size = frame_size*feature_chunk_size
|
pcm_chunk_size = frame_size*feature_chunk_size
|
||||||
|
@ -130,14 +130,13 @@ print("ulaw std = ", np.std(out_exc))
|
||||||
|
|
||||||
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
|
features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
|
||||||
features = features[:, :, :nb_used_features]
|
features = features[:, :, :nb_used_features]
|
||||||
features[:,:,18:36] = 0
|
|
||||||
|
|
||||||
fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)
|
fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0)
|
||||||
fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0)
|
fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0)
|
||||||
features = np.concatenate([fpad1, features, fpad2], axis=1)
|
features = np.concatenate([fpad1, features, fpad2], axis=1)
|
||||||
|
|
||||||
|
|
||||||
periods = (.1 + 50*features[:,:,36:37]+100).astype('int16')
|
periods = (.1 + 50*features[:,:,18:19]+100).astype('int16')
|
||||||
#periods = np.minimum(periods, 255)
|
#periods = np.minimum(periods, 255)
|
||||||
|
|
||||||
# dump models to disk as we go
|
# dump models to disk as we go
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue