mirror of
https://github.com/xiph/opus.git
synced 2025-05-15 16:08:30 +00:00
Using opus_int16 instead of short in LPCNet code
This commit is contained in:
parent
abe817c3fc
commit
b64a89feca
7 changed files with 36 additions and 46 deletions
|
@ -73,7 +73,7 @@ void compute_noise(int *noise, float noise_std) {
|
|||
}
|
||||
}
|
||||
|
||||
static short float2short(float x)
|
||||
static opus_int16 float2short(float x)
|
||||
{
|
||||
int i;
|
||||
i = (int)floor(.5+x);
|
||||
|
@ -81,9 +81,9 @@ static short float2short(float x)
|
|||
}
|
||||
|
||||
|
||||
void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) {
|
||||
void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
|
||||
int i;
|
||||
short data[2*FRAME_SIZE];
|
||||
opus_int16 data[2*FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) {
|
||||
float p=0;
|
||||
float e;
|
||||
|
@ -121,9 +121,9 @@ int main(int argc, char **argv) {
|
|||
FILE *f1;
|
||||
FILE *ffeat;
|
||||
FILE *fpcm=NULL;
|
||||
short pcm[FRAME_SIZE]={0};
|
||||
opus_int16 pcm[FRAME_SIZE]={0};
|
||||
int noisebuf[FRAME_SIZE]={0};
|
||||
short tmp[FRAME_SIZE] = {0};
|
||||
opus_int16 tmp[FRAME_SIZE] = {0};
|
||||
float savedX[FRAME_SIZE] = {0};
|
||||
float speech_gain=1;
|
||||
int last_silent = 1;
|
||||
|
@ -173,11 +173,11 @@ int main(int argc, char **argv) {
|
|||
int silent;
|
||||
size_t ret;
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
|
||||
ret = fread(tmp, sizeof(short), FRAME_SIZE, f1);
|
||||
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
|
||||
if (feof(f1) || ret != FRAME_SIZE) {
|
||||
if (!training) break;
|
||||
rewind(f1);
|
||||
ret = fread(tmp, sizeof(short), FRAME_SIZE, f1);
|
||||
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
|
||||
if (ret != FRAME_SIZE) {
|
||||
fprintf(stderr, "error reading\n");
|
||||
exit(1);
|
||||
|
@ -240,7 +240,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
process_single_frame(st, ffeat);
|
||||
if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
|
||||
/*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/
|
||||
/*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
|
||||
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
|
||||
old_speech_gain = speech_gain;
|
||||
count++;
|
||||
|
|
|
@ -232,7 +232,7 @@ void lpcnet_reset_signal(LPCNetState *lpcnet)
|
|||
RNN_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
|
||||
}
|
||||
|
||||
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload)
|
||||
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -270,12 +270,12 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int
|
|||
}
|
||||
}
|
||||
|
||||
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload)
|
||||
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload)
|
||||
{
|
||||
run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
|
||||
lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
|
||||
}
|
||||
|
||||
void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N) {
|
||||
void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) {
|
||||
lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
|
||||
}
|
||||
|
|
30
dnn/lpcnet.h
30
dnn/lpcnet.h
|
@ -27,7 +27,7 @@
|
|||
#ifndef _LPCNET_H_
|
||||
#define _LPCNET_H_
|
||||
|
||||
|
||||
#include "opus_types.h"
|
||||
|
||||
#define NB_FEATURES 20
|
||||
#define NB_TOTAL_FEATURES 36
|
||||
|
@ -73,10 +73,10 @@ void lpcnet_decoder_destroy(LPCNetDecState *st);
|
|||
/** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640).
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
|
||||
* @param [in] buf <tt>const unsigned char *</tt>: Compressed packet
|
||||
* @param [out] pcm <tt>short *</tt>: Decoded audio
|
||||
* @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, short *pcm);
|
||||
int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm);
|
||||
|
||||
|
||||
|
||||
|
@ -106,27 +106,19 @@ void lpcnet_encoder_destroy(LPCNetEncState *st);
|
|||
|
||||
/** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8).
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
|
||||
* @param [in] pcm <tt>short *</tt>: Input speech to be encoded
|
||||
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded
|
||||
* @param [out] buf <tt>const unsigned char *</tt>: Compressed packet
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_encode(LPCNetEncState *st, const short *pcm, unsigned char *buf);
|
||||
|
||||
/** Compute features on LPCNET_PACKET_SAMPLES speech samples (currently 640) and output features for 4 10-ms frames at once.
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
|
||||
* @param [in] pcm <tt>short *</tt>: Input speech to be analyzed
|
||||
* @param [out] features <tt>float[4][NB_TOTAL_FEATURES]</tt>: Four feature vectors
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_compute_features(LPCNetEncState *st, const short *pcm, float features[4][NB_TOTAL_FEATURES]);
|
||||
int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf);
|
||||
|
||||
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
|
||||
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
|
||||
* @param [in] pcm <tt>short *</tt>: Input speech to be analyzed
|
||||
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed
|
||||
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
|
||||
* @retval 0 Success
|
||||
*/
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
|
||||
|
||||
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
|
||||
|
@ -164,11 +156,11 @@ void lpcnet_destroy(LPCNetState *st);
|
|||
/** Synthesizes speech from an LPCNet feature vector.
|
||||
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
|
||||
* @param [in] features <tt>const float *</tt>: Compressed packet
|
||||
* @param [out] output <tt>short **</tt>: Synthesized speech
|
||||
* @param [out] output <tt>opus_int16 **</tt>: Synthesized speech
|
||||
* @param [in] N <tt>int</tt>: Number of samples to generate
|
||||
* @retval 0 Success
|
||||
*/
|
||||
void lpcnet_synthesize(LPCNetState *st, const float *features, short *output, int N);
|
||||
void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N);
|
||||
|
||||
|
||||
#define LPCNET_PLC_CAUSAL 0
|
||||
|
@ -184,9 +176,9 @@ LPCNetPLCState *lpcnet_plc_create(int options);
|
|||
|
||||
void lpcnet_plc_destroy(LPCNetPLCState *st);
|
||||
|
||||
int lpcnet_plc_update(LPCNetPLCState *st, short *pcm);
|
||||
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm);
|
||||
|
||||
int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm);
|
||||
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm);
|
||||
|
||||
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
|
||||
|
||||
|
|
|
@ -158,7 +158,7 @@ int main(int argc, char **argv) {
|
|||
net = lpcnet_encoder_create();
|
||||
while (1) {
|
||||
float features[NB_TOTAL_FEATURES];
|
||||
short pcm[LPCNET_FRAME_SIZE];
|
||||
opus_int16 pcm[LPCNET_FRAME_SIZE];
|
||||
size_t ret;
|
||||
ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
|
||||
if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
|
||||
|
@ -175,7 +175,7 @@ int main(int argc, char **argv) {
|
|||
while (1) {
|
||||
float in_features[NB_TOTAL_FEATURES];
|
||||
float features[NB_FEATURES];
|
||||
short pcm[LPCNET_FRAME_SIZE];
|
||||
opus_int16 pcm[LPCNET_FRAME_SIZE];
|
||||
size_t ret;
|
||||
ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
|
||||
if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
|
||||
|
@ -185,7 +185,7 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
lpcnet_destroy(net);
|
||||
} else if (mode == MODE_PLC) {
|
||||
short pcm[FRAME_SIZE];
|
||||
opus_int16 pcm[FRAME_SIZE];
|
||||
int count=0;
|
||||
int loss=0;
|
||||
int skip=0, extra=0;
|
||||
|
|
|
@ -233,7 +233,7 @@ static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *
|
|||
return 0;
|
||||
}
|
||||
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]) {
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
|
|
|
@ -175,10 +175,10 @@ void clear_state(LPCNetPLCState *st) {
|
|||
/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
|
||||
needs to generate two feature vectors to conceal the first lost packet.*/
|
||||
|
||||
int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
||||
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) {
|
||||
int i;
|
||||
float x[FRAME_SIZE];
|
||||
short output[FRAME_SIZE];
|
||||
opus_int16 output[FRAME_SIZE];
|
||||
float plc_features[2*NB_BANDS+NB_FEATURES+1];
|
||||
int delta = 0;
|
||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||
|
@ -186,7 +186,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
|||
if (st->skip_analysis) {
|
||||
/*fprintf(stderr, "skip update\n");*/
|
||||
if (st->blend) {
|
||||
short tmp[FRAME_SIZE-TRAINING_OFFSET];
|
||||
opus_int16 tmp[FRAME_SIZE-TRAINING_OFFSET];
|
||||
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
|
||||
RNN_COPY(zeros, plc_features, 2*NB_BANDS);
|
||||
zeros[2*NB_BANDS+NB_FEATURES] = 1;
|
||||
|
@ -263,9 +263,9 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
|||
}
|
||||
|
||||
static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6};
|
||||
int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
|
||||
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
|
||||
int i;
|
||||
short output[FRAME_SIZE];
|
||||
opus_int16 output[FRAME_SIZE];
|
||||
run_frame_network_flush(&st->lpcnet);
|
||||
/* If we concealed the previous frame, finish synthesizing the rest of the samples. */
|
||||
/* FIXME: Copy/predict features. */
|
||||
|
|
|
@ -77,7 +77,7 @@ struct LPCNetPLCState {
|
|||
int fec_read_pos;
|
||||
int fec_fill_pos;
|
||||
int fec_skip;
|
||||
short pcm[PLC_BUF_SIZE+FRAME_SIZE];
|
||||
opus_int16 pcm[PLC_BUF_SIZE+FRAME_SIZE];
|
||||
int pcm_fill;
|
||||
int skip_analysis;
|
||||
int blend;
|
||||
|
@ -91,19 +91,17 @@ void preemphasis(float *y, float *mem, const float *x, float coef, int N);
|
|||
|
||||
void compute_frame_features(LPCNetEncState *st, const float *in);
|
||||
|
||||
void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]);
|
||||
|
||||
void lpcnet_reset_signal(LPCNetState *lpcnet);
|
||||
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
|
||||
void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
|
||||
void run_frame_network_flush(LPCNetState *lpcnet);
|
||||
|
||||
|
||||
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload);
|
||||
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload);
|
||||
void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const short *pcm_in, short *output, int N);
|
||||
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload);
|
||||
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
|
||||
void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
|
||||
void process_single_frame(LPCNetEncState *st, FILE *ffeat);
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]);
|
||||
|
||||
void process_single_frame(LPCNetEncState *st, FILE *ffeat);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue