mirror of
https://github.com/xiph/opus.git
synced 2025-05-30 15:17:42 +00:00
Merge branch 'exp_analysis'
Conflicts: celt/celt_encoder.c
This commit is contained in:
commit
f96fc8cc83
10 changed files with 936 additions and 196 deletions
|
@ -1329,7 +1329,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
|
|||
|
||||
prefilter_tapset = st->tapset_decision;
|
||||
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes);
|
||||
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
|
||||
if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3)
|
||||
&& (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
|
||||
pitch_change = 1;
|
||||
if (pf_on==0)
|
||||
|
@ -1353,15 +1353,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
|
|||
|
||||
isTransient = 0;
|
||||
shortBlocks = 0;
|
||||
if (st->complexity >= 1)
|
||||
{
|
||||
isTransient = transient_analysis(in, N+st->overlap, CC,
|
||||
&tf_estimate, &tf_chan);
|
||||
}
|
||||
if (LM>0 && ec_tell(enc)+3<=total_bits)
|
||||
{
|
||||
if (st->complexity >= 1)
|
||||
{
|
||||
isTransient = transient_analysis(in, N+st->overlap, CC,
|
||||
&tf_estimate, &tf_chan);
|
||||
if (isTransient)
|
||||
shortBlocks = M;
|
||||
}
|
||||
if (isTransient)
|
||||
shortBlocks = M;
|
||||
} else {
|
||||
isTransient = 0;
|
||||
}
|
||||
|
||||
ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */
|
||||
|
|
|
@ -148,8 +148,9 @@ extern "C" {
|
|||
#define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */
|
||||
#define OPUS_SET_LSB_DEPTH_REQUEST 4036
|
||||
#define OPUS_GET_LSB_DEPTH_REQUEST 4037
|
||||
|
||||
#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039
|
||||
#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040
|
||||
#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041
|
||||
|
||||
/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */
|
||||
|
||||
|
@ -185,6 +186,15 @@ extern "C" {
|
|||
#define OPUS_BANDWIDTH_SUPERWIDEBAND 1104 /**<12 kHz bandpass @hideinitializer*/
|
||||
#define OPUS_BANDWIDTH_FULLBAND 1105 /**<20 kHz bandpass @hideinitializer*/
|
||||
|
||||
#define OPUS_FRAMESIZE_ARG 5000 /**< Select frame size from the argument (default) */
|
||||
#define OPUS_FRAMESIZE_2_5_MS 5001 /**< Use 2.5 ms frames */
|
||||
#define OPUS_FRAMESIZE_5_MS 5002 /**< Use 5 ms frames */
|
||||
#define OPUS_FRAMESIZE_10_MS 5003 /**< Use 10 ms frames */
|
||||
#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */
|
||||
#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */
|
||||
#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */
|
||||
#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */
|
||||
|
||||
/**@}*/
|
||||
|
||||
|
||||
|
@ -525,6 +535,32 @@ extern "C" {
|
|||
* @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate).
|
||||
* @hideinitializer */
|
||||
#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x)
|
||||
|
||||
/** Configures the encoder's use of variable duration frames.
|
||||
* When enabled, the encoder is free to use a shorter frame size than the one
|
||||
* requested in the opus_encode*() call. It is then the user's responsibility
|
||||
* to verify how much audio was encoded by checking the ToC byte of the encoded
|
||||
* packet. The part of the audio that was not encoded needs to be resent to the
|
||||
* encoder for the next call. Do not use this option unless you <b>really</b>
|
||||
* know what you are doing.
|
||||
* @see OPUS_GET_EXPERT_VARIABLE_DURATION
|
||||
* @param[in] x <tt>opus_int32</tt>: Allowed values:
|
||||
* <dl>
|
||||
* <dt>0</dt><dd>Disable variable duration (default).</dd>
|
||||
* <dt>1</dt><dd>Enable variable duration.</dd>
|
||||
* </dl>
|
||||
* @hideinitializer */
|
||||
#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x)
|
||||
/** Gets the encoder's configured use of variable duration frames.
|
||||
* @see OPUS_SET_EXPERT_VARIABLE_DURATION
|
||||
* @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values:
|
||||
* <dl>
|
||||
* <dt>0</dt><dd>variable duration disabled (default).</dd>
|
||||
* <dt>1</dt><dd>variable duration enabled.</dd>
|
||||
* </dl>
|
||||
* @hideinitializer */
|
||||
#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x)
|
||||
|
||||
/**@}*/
|
||||
|
||||
/** @defgroup opus_genericctls Generic CTLs
|
||||
|
|
230
src/analysis.c
230
src/analysis.c
|
@ -139,10 +139,56 @@ static inline float fast_atan2f(float y, float x) {
|
|||
}
|
||||
}
|
||||
|
||||
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth)
|
||||
void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
|
||||
{
|
||||
int pos;
|
||||
int curr_lookahead;
|
||||
float psum;
|
||||
int i;
|
||||
|
||||
pos = tonal->read_pos;
|
||||
curr_lookahead = tonal->write_pos-tonal->read_pos;
|
||||
if (curr_lookahead<0)
|
||||
curr_lookahead += DETECT_SIZE;
|
||||
|
||||
if (len > 480 && pos != tonal->write_pos)
|
||||
{
|
||||
pos++;
|
||||
if (pos==DETECT_SIZE)
|
||||
pos=0;
|
||||
}
|
||||
if (pos == tonal->write_pos)
|
||||
pos--;
|
||||
if (pos<0)
|
||||
pos = DETECT_SIZE-1;
|
||||
OPUS_COPY(info_out, &tonal->info[pos], 1);
|
||||
tonal->read_subframe += len/120;
|
||||
while (tonal->read_subframe>=4)
|
||||
{
|
||||
tonal->read_subframe -= 4;
|
||||
tonal->read_pos++;
|
||||
}
|
||||
if (tonal->read_pos>=DETECT_SIZE)
|
||||
tonal->read_pos-=DETECT_SIZE;
|
||||
|
||||
/* Compensate for the delay in the features themselves.
|
||||
FIXME: Need a better estimate the 10 I just made up */
|
||||
curr_lookahead = IMAX(curr_lookahead-10, 0);
|
||||
|
||||
psum=0;
|
||||
for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
|
||||
psum += tonal->pmusic[i];
|
||||
for (;i<DETECT_SIZE;i++)
|
||||
psum += tonal->pspeech[i];
|
||||
psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
|
||||
/*printf("%f %f\n", psum, info_out->music_prob);*/
|
||||
|
||||
info_out->music_prob = psum;
|
||||
}
|
||||
|
||||
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix)
|
||||
{
|
||||
int i, b;
|
||||
const CELTMode *mode;
|
||||
const kiss_fft_state *kfft;
|
||||
kiss_fft_cpx in[480], out[480];
|
||||
int N = 480, N2=240;
|
||||
|
@ -163,14 +209,15 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
float slope=0;
|
||||
float frame_stationarity;
|
||||
float relativeE;
|
||||
float frame_prob;
|
||||
float frame_probs[2];
|
||||
float alpha, alphaE, alphaE2;
|
||||
float frame_loudness;
|
||||
float bandwidth_mask;
|
||||
int bandwidth=0;
|
||||
float maxE = 0;
|
||||
float noise_floor;
|
||||
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
|
||||
int remaining;
|
||||
AnalysisInfo *info;
|
||||
|
||||
tonal->last_transition++;
|
||||
alpha = 1.f/IMIN(20, 1+tonal->count);
|
||||
|
@ -179,27 +226,32 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
|
||||
if (tonal->count<4)
|
||||
tonal->music_prob = .5;
|
||||
kfft = mode->mdct.kfft[0];
|
||||
if (C==1)
|
||||
kfft = celt_mode->mdct.kfft[0];
|
||||
if (tonal->count==0)
|
||||
tonal->mem_fill = 240;
|
||||
downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C);
|
||||
if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
|
||||
{
|
||||
for (i=0;i<N2;i++)
|
||||
{
|
||||
float w = analysis_window[i];
|
||||
in[i].r = MULT16_16(w, x[i]);
|
||||
in[i].i = MULT16_16(w, x[N-N2+i]);
|
||||
in[N-i-1].r = MULT16_16(w, x[N-i-1]);
|
||||
in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
|
||||
}
|
||||
} else {
|
||||
for (i=0;i<N2;i++)
|
||||
{
|
||||
float w = analysis_window[i];
|
||||
in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
|
||||
in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
|
||||
in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
|
||||
in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
|
||||
}
|
||||
tonal->mem_fill += len;
|
||||
/* Don't have enough to update the analysis */
|
||||
return;
|
||||
}
|
||||
info = &tonal->info[tonal->write_pos++];
|
||||
if (tonal->write_pos>=DETECT_SIZE)
|
||||
tonal->write_pos-=DETECT_SIZE;
|
||||
|
||||
for (i=0;i<N2;i++)
|
||||
{
|
||||
float w = analysis_window[i];
|
||||
in[i].r = MULT16_16(w, tonal->inmem[i]);
|
||||
in[i].i = MULT16_16(w, tonal->inmem[N2+i]);
|
||||
in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]);
|
||||
in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]);
|
||||
}
|
||||
OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
|
||||
remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
|
||||
downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C);
|
||||
tonal->mem_fill = 240 + remaining;
|
||||
opus_fft(kfft, in, out);
|
||||
|
||||
for (i=1;i<N2;i++)
|
||||
|
@ -417,27 +469,91 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
features[24] = tonal->lowECount;
|
||||
|
||||
#ifndef FIXED_POINT
|
||||
mlp_process(&net, features, &frame_prob);
|
||||
frame_prob = .5f*(frame_prob+1);
|
||||
mlp_process(&net, features, frame_probs);
|
||||
frame_probs[0] = .5f*(frame_probs[0]+1);
|
||||
/* Curve fitting between the MLP probability and the actual probability */
|
||||
frame_prob = .01f + 1.21f*frame_prob*frame_prob - .23f*(float)pow(frame_prob, 10);
|
||||
frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);
|
||||
frame_probs[1] = .5*frame_probs[1]+.5;
|
||||
frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5;
|
||||
|
||||
/*printf("%f\n", frame_prob);*/
|
||||
/*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
|
||||
{
|
||||
float tau, beta;
|
||||
float p0, p1;
|
||||
float max_certainty;
|
||||
/* One transition every 3 minutes */
|
||||
tau = .00005f;
|
||||
beta = .1f;
|
||||
max_certainty = .01f+1.f/(20.f+.5f*tonal->last_transition);
|
||||
tau = .00005f*frame_probs[1];
|
||||
beta = .05f;
|
||||
if (1) {
|
||||
/* Adapt beta based on how "unexpected" the new prob is */
|
||||
float p, q;
|
||||
p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
|
||||
q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
|
||||
beta = .01+.05*ABS16(p-q)/(p*(1-q)+q*(1-p));
|
||||
}
|
||||
p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
|
||||
p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
|
||||
p0 *= (float)pow(1-frame_prob, beta);
|
||||
p1 *= (float)pow(frame_prob, beta);
|
||||
tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
|
||||
p0 *= (float)pow(1-frame_probs[0], beta);
|
||||
p1 *= (float)pow(frame_probs[0], beta);
|
||||
tonal->music_prob = p1/(p0+p1);
|
||||
info->music_prob = tonal->music_prob;
|
||||
/*printf("%f %f\n", frame_prob, info->music_prob);*/
|
||||
|
||||
float psum=1e-20;
|
||||
float speech0 = (float)pow(1-frame_probs[0], beta);
|
||||
float music0 = (float)pow(frame_probs[0], beta);
|
||||
if (tonal->count==1)
|
||||
{
|
||||
tonal->pspeech[0]=.5;
|
||||
tonal->pmusic [0]=.5;
|
||||
}
|
||||
float s0, m0;
|
||||
s0 = tonal->pspeech[0] + tonal->pspeech[1];
|
||||
m0 = tonal->pmusic [0] + tonal->pmusic [1];
|
||||
tonal->pspeech[0] = s0*(1-tau)*speech0;
|
||||
tonal->pmusic [0] = m0*(1-tau)*music0;
|
||||
for (i=1;i<DETECT_SIZE-1;i++)
|
||||
{
|
||||
tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
|
||||
tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
|
||||
}
|
||||
tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
|
||||
tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
|
||||
|
||||
for (i=0;i<DETECT_SIZE;i++)
|
||||
psum += tonal->pspeech[i] + tonal->pmusic[i];
|
||||
psum = 1.f/psum;
|
||||
for (i=0;i<DETECT_SIZE;i++)
|
||||
{
|
||||
tonal->pspeech[i] *= psum;
|
||||
tonal->pmusic [i] *= psum;
|
||||
}
|
||||
psum = tonal->pmusic[0];
|
||||
for (i=1;i<DETECT_SIZE;i++)
|
||||
psum += tonal->pspeech[i];
|
||||
|
||||
/* Estimate our confidence in the speech/music decisions */
|
||||
if (frame_probs[1]>.75)
|
||||
{
|
||||
if (tonal->music_prob>.9)
|
||||
{
|
||||
float adapt;
|
||||
adapt = 1.f/(++tonal->music_confidence_count);
|
||||
tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
|
||||
tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
|
||||
}
|
||||
if (tonal->music_prob<.1)
|
||||
{
|
||||
float adapt;
|
||||
adapt = 1.f/(++tonal->speech_confidence_count);
|
||||
tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
|
||||
tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
|
||||
}
|
||||
} else {
|
||||
if (tonal->music_confidence_count==0)
|
||||
tonal->music_confidence = .9;
|
||||
if (tonal->speech_confidence_count==0)
|
||||
tonal->speech_confidence = .1;
|
||||
}
|
||||
psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum));
|
||||
}
|
||||
if (tonal->last_music != (tonal->music_prob>.5f))
|
||||
tonal->last_transition=0;
|
||||
|
@ -465,4 +581,48 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
|
|||
/*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
|
||||
info->noisiness = frame_noisiness;
|
||||
info->valid = 1;
|
||||
if (info_out!=NULL)
|
||||
OPUS_COPY(info_out, info, 1);
|
||||
}
|
||||
|
||||
int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
|
||||
const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
|
||||
int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
|
||||
{
|
||||
int offset;
|
||||
int pcm_len;
|
||||
|
||||
/* Avoid overflow/wrap-around of the analysis buffer */
|
||||
frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size);
|
||||
|
||||
pcm_len = frame_size - analysis->analysis_offset;
|
||||
offset = 0;
|
||||
do {
|
||||
tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix);
|
||||
offset += 480;
|
||||
pcm_len -= 480;
|
||||
} while (pcm_len>0);
|
||||
analysis->analysis_offset = frame_size;
|
||||
|
||||
if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
|
||||
{
|
||||
int LM = 3;
|
||||
LM = optimize_framesize(pcm, frame_size, C, Fs, bitrate_bps,
|
||||
analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix);
|
||||
while ((Fs/400<<LM)>frame_size)
|
||||
LM--;
|
||||
frame_size = (Fs/400<<LM);
|
||||
} else {
|
||||
frame_size = frame_size_select(frame_size, variable_duration, Fs);
|
||||
}
|
||||
if (frame_size<0)
|
||||
return -1;
|
||||
analysis->analysis_offset -= frame_size;
|
||||
|
||||
/* Only perform analysis up to 20-ms frames. Longer ones will be split if
|
||||
they're in CELT-only mode. */
|
||||
analysis_info->valid = 0;
|
||||
tonality_get_info(analysis, analysis_info, frame_size);
|
||||
|
||||
return frame_size;
|
||||
}
|
||||
|
|
|
@ -28,18 +28,27 @@
|
|||
#ifndef ANALYSIS_H
|
||||
#define ANALYSIS_H
|
||||
|
||||
#include "celt.h"
|
||||
#include "opus_private.h"
|
||||
|
||||
#define NB_FRAMES 8
|
||||
#define NB_TBANDS 18
|
||||
#define NB_TOT_BANDS 21
|
||||
#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
|
||||
|
||||
#define DETECT_SIZE 200
|
||||
|
||||
typedef struct {
|
||||
float angle[240];
|
||||
float d_angle[240];
|
||||
float d2_angle[240];
|
||||
float inmem[ANALYSIS_BUF_SIZE];
|
||||
int mem_fill; /* number of usable samples in the buffer */
|
||||
float prev_band_tonality[NB_TBANDS];
|
||||
float prev_tonality;
|
||||
float E[NB_FRAMES][NB_TBANDS];
|
||||
float lowE[NB_TBANDS], highE[NB_TBANDS];
|
||||
float lowE[NB_TBANDS];
|
||||
float highE[NB_TBANDS];
|
||||
float meanE[NB_TOT_BANDS];
|
||||
float mem[32];
|
||||
float cmean[8];
|
||||
|
@ -52,9 +61,27 @@ typedef struct {
|
|||
int last_transition;
|
||||
int count;
|
||||
int opus_bandwidth;
|
||||
opus_val32 subframe_mem[3];
|
||||
int analysis_offset;
|
||||
float pspeech[DETECT_SIZE];
|
||||
float pmusic[DETECT_SIZE];
|
||||
float speech_confidence;
|
||||
float music_confidence;
|
||||
int speech_confidence_count;
|
||||
int music_confidence_count;
|
||||
int write_pos;
|
||||
int read_pos;
|
||||
int read_subframe;
|
||||
AnalysisInfo info[DETECT_SIZE];
|
||||
} TonalityAnalysisState;
|
||||
|
||||
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
|
||||
CELTEncoder *celt_enc, const opus_val16 *x, int C, int lsb_depth);
|
||||
const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix);
|
||||
|
||||
void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len);
|
||||
|
||||
int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm,
|
||||
const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
|
||||
int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
|
||||
|
||||
#endif
|
||||
|
|
153
src/mlp_data.c
153
src/mlp_data.c
|
@ -3,74 +3,103 @@
|
|||
|
||||
#include "mlp.h"
|
||||
|
||||
/* RMS error was 0.179835, seed was 1322103961 */
|
||||
/* RMS error was 0.138320, seed was 1361535663 */
|
||||
|
||||
static const float weights[271] = {
|
||||
static const float weights[422] = {
|
||||
|
||||
/* hidden layer */
|
||||
1.55597f, -0.0739792f, -0.0646761f, -0.099531f, -0.0794943f,
|
||||
0.0180174f, -0.0391354f, 0.0508224f, -0.0160169f, -0.0773263f,
|
||||
-0.0300002f, -0.0865361f, 0.124477f, -0.28648f, -0.0860702f,
|
||||
-0.518949f, -0.0873341f, -0.235393f, -0.907833f, -0.383573f,
|
||||
0.535388f, -0.57944f, 0.98116f, 0.8482f, 1.12426f,
|
||||
-3.23721f, -0.647072f, -0.0265139f, 0.0711052f, -0.00125666f,
|
||||
-0.0396181f, -0.44282f, -0.510495f, -0.201865f, 0.0134336f,
|
||||
-0.167205f, -0.155406f, 0.00041678f, -0.00468705f, -0.0233224f,
|
||||
0.264279f, -0.301375f, 0.00234895f, 0.0144741f, -0.137535f,
|
||||
0.200323f, 0.0192027f, 3.19818f, 2.03495f, 0.705517f,
|
||||
-4.6025f, -0.11485f, -0.792716f, 0.150714f, 0.10608f,
|
||||
0.240633f, 0.0690698f, 0.0695297f, 0.124819f, 0.0501433f,
|
||||
0.0460952f, 0.147639f, 0.10327f, 0.158007f, 0.113714f,
|
||||
0.0276191f, 0.0680749f, -0.130012f, 0.0796126f, 0.133067f,
|
||||
0.51495f, 0.747578f, -0.128742f, 5.98112f, -1.16698f,
|
||||
-0.276492f, -1.73549f, -3.90234f, 2.01489f, -0.040118f,
|
||||
-0.113002f, -0.146751f, -0.113569f, 0.0534873f, 0.0989832f,
|
||||
0.0872875f, 0.049266f, 0.0367557f, -0.00889148f, -0.0648461f,
|
||||
-0.00190352f, 0.0143773f, 0.0259364f, -0.0592133f, -0.0672924f,
|
||||
0.1399f, -0.0987886f, -0.347402f, 0.101326f, -0.0680876f,
|
||||
0.469186f, 0.246922f, 10.4017f, 3.44846f, -0.662725f,
|
||||
-0.0328208f, -0.0561274f, -0.0167744f, 0.00044282f, -0.0457645f,
|
||||
-0.0408314f, -0.013113f, -0.0373873f, -0.0474122f, -0.0273745f,
|
||||
-0.0308505f, 0.000582959f, -0.0421135f, 0.464859f, 0.196842f,
|
||||
0.320538f, 0.0435528f, -0.200168f, 0.266475f, -0.0853727f,
|
||||
1.20397f, 0.711542f, -1.04397f, -1.47759f, 1.26768f,
|
||||
0.446958f, 0.266477f, -0.30802f, 0.28431f, -0.118541f,
|
||||
0.00836345f, 0.0689026f, -0.0137996f, -0.0395417f, 0.26982f,
|
||||
-0.206255f, 0.16066f, 0.114757f, 0.359587f, -0.106503f,
|
||||
-0.0948534f, 0.175358f, -0.122966f, -0.0056675f, 0.483848f,
|
||||
-0.134916f, -0.427567f, -0.140172f, -1.0866f, -2.73921f,
|
||||
0.549843f, 0.17685f, 0.0010675f, -0.00137386f, 0.0884424f,
|
||||
-0.0698736f, -0.00174136f, 0.0718775f, -0.0396849f, 0.0448056f,
|
||||
0.0577853f, -0.0372353f, 0.134599f, 0.0260656f, 0.140322f,
|
||||
0.22704f, -0.020568f, -0.0142424f, -0.21723f, -0.997704f,
|
||||
-0.884573f, -0.163495f, 2.33617f, 0.224142f, 0.19635f,
|
||||
-0.957387f, 0.144678f, 1.47035f, -0.00700498f, -0.0472309f,
|
||||
-0.0137848f, -0.0189145f, 0.00856479f, 0.0316965f, 0.00613373f,
|
||||
0.00209807f, 0.00270964f, -0.0490206f, 0.0105712f, -0.0465045f,
|
||||
-0.0381532f, -0.0985268f, -0.108297f, 0.0146409f, -0.0040718f,
|
||||
-0.0698572f, -0.380568f, -0.230479f, 3.98917f, 0.457652f,
|
||||
-1.02355f, -7.4435f, -0.475314f, 1.61743f, 0.0254017f,
|
||||
-0.00791293f, 0.047217f, 0.0220995f, -0.0304311f, 0.0052168f,
|
||||
-0.0404054f, -0.0230293f, 0.00169229f, -0.0138178f, 0.0043137f,
|
||||
-0.0598088f, -0.133601f, 0.0555138f, -0.177358f, -0.159856f,
|
||||
-0.137281f, 0.108051f, -0.305973f, 0.393775f, 0.0747287f,
|
||||
0.783993f, -0.875086f, 1.06862f, 0.340519f, -0.352681f,
|
||||
-0.0830912f, -0.100017f, 0.0729085f, -0.00829403f, 0.027489f,
|
||||
-0.0779597f, 0.082286f, -0.164181f, -0.41519f, 0.00282335f,
|
||||
-0.29573f, 0.125571f, 0.726935f, 0.392137f, 0.491348f,
|
||||
0.0723196f, -0.0259758f, -0.0636332f, -0.452384f, -0.000225974f,
|
||||
-2.34001f, 2.45211f, -0.544628f, 5.62944f, -3.44507f,
|
||||
-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f,
|
||||
-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f,
|
||||
-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f,
|
||||
0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f,
|
||||
0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f,
|
||||
24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f,
|
||||
-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f,
|
||||
-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f,
|
||||
-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f,
|
||||
1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f,
|
||||
15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f,
|
||||
0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f,
|
||||
-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f,
|
||||
0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f,
|
||||
0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f,
|
||||
-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f,
|
||||
-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f,
|
||||
-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f,
|
||||
0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f,
|
||||
-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f,
|
||||
2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f,
|
||||
0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f,
|
||||
-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f,
|
||||
0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f,
|
||||
0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f,
|
||||
-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f,
|
||||
5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f,
|
||||
-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f,
|
||||
-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f,
|
||||
-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f,
|
||||
1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f,
|
||||
-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f,
|
||||
-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f,
|
||||
0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f,
|
||||
0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f,
|
||||
-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f,
|
||||
10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f,
|
||||
-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f,
|
||||
-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f,
|
||||
-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f,
|
||||
0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f,
|
||||
-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f,
|
||||
0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f,
|
||||
0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f,
|
||||
-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f,
|
||||
0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f,
|
||||
-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f,
|
||||
-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f,
|
||||
-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f,
|
||||
-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f,
|
||||
-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f,
|
||||
5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f,
|
||||
1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f,
|
||||
0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f,
|
||||
-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f,
|
||||
0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f,
|
||||
-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f,
|
||||
-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f,
|
||||
0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f,
|
||||
-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f,
|
||||
-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f,
|
||||
0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f,
|
||||
-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f,
|
||||
0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f,
|
||||
-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f,
|
||||
-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f,
|
||||
0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f,
|
||||
-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f,
|
||||
0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f,
|
||||
-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f,
|
||||
0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f,
|
||||
-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f,
|
||||
4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f,
|
||||
0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f,
|
||||
-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f,
|
||||
0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f,
|
||||
0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f,
|
||||
3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f,
|
||||
|
||||
/* output layer */
|
||||
-3.13835f, 0.994751f, 0.444901f, 1.59518f, 1.23665f,
|
||||
3.37012f, -1.34606f, 1.99131f, 1.33476f, 1.3885f,
|
||||
1.12559f, };
|
||||
-0.381439, 0.12115, -0.906927, 2.93878, 1.6388,
|
||||
0.882811, 0.874344, 1.21726, -0.874545, 0.321706,
|
||||
0.785055, 0.946558, -0.575066, -3.46553, 0.884905,
|
||||
0.0924047, -9.90712, 0.391338, 0.160103, -2.04954,
|
||||
4.1455, 0.0684029, -0.144761, -0.285282, 0.379244,
|
||||
-1.1584, -0.0277241, -9.85, -4.82386, 3.71333,
|
||||
3.87308, 3.52558, };
|
||||
|
||||
static const int topo[3] = {25, 10, 1};
|
||||
static const int topo[3] = {25, 15, 2};
|
||||
|
||||
const MLP net = {
|
||||
3,
|
||||
topo,
|
||||
weights
|
||||
3,
|
||||
topo,
|
||||
weights
|
||||
};
|
||||
|
||||
|
|
|
@ -106,6 +106,7 @@ MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int
|
|||
}
|
||||
|
||||
#define MAX_NEURONS 100
|
||||
#define MAX_OUT 10
|
||||
|
||||
double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)
|
||||
{
|
||||
|
@ -120,7 +121,8 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
|
|||
double netOut[MAX_NEURONS];
|
||||
double error[MAX_NEURONS];
|
||||
|
||||
*error_rate = 0;
|
||||
for (i=0;i<outDim;i++)
|
||||
error_rate[i] = 0;
|
||||
topo = net->topo;
|
||||
inDim = net->topo[0];
|
||||
hiddenDim = net->topo[1];
|
||||
|
@ -153,7 +155,7 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
|
|||
netOut[i] = tansig_approx(sum);
|
||||
error[i] = out[i] - netOut[i];
|
||||
rms += error[i]*error[i];
|
||||
*error_rate += fabs(error[i])>1;
|
||||
error_rate[i] += fabs(error[i])>1;
|
||||
/*error[i] = error[i]/(1+fabs(error[i]));*/
|
||||
}
|
||||
/* Back-propagate error */
|
||||
|
@ -194,7 +196,7 @@ struct GradientArg {
|
|||
double *W0_grad;
|
||||
double *W1_grad;
|
||||
double rms;
|
||||
double error_rate;
|
||||
double error_rate[MAX_OUT];
|
||||
};
|
||||
|
||||
void *gradient_thread_process(void *_arg)
|
||||
|
@ -213,7 +215,7 @@ void *gradient_thread_process(void *_arg)
|
|||
sem_wait(&sem_begin[arg->id]);
|
||||
if (arg->done)
|
||||
break;
|
||||
arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate);
|
||||
arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate);
|
||||
sem_post(&sem_end[arg->id]);
|
||||
}
|
||||
fprintf(stderr, "done\n");
|
||||
|
@ -295,7 +297,7 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
|
|||
for (e=0;e<nbEpoch;e++)
|
||||
{
|
||||
double rms=0;
|
||||
double error_rate = 0;
|
||||
double error_rate[2] = {0,0};
|
||||
for (i=0;i<NB_THREADS;i++)
|
||||
{
|
||||
sem_post(&sem_begin[i]);
|
||||
|
@ -306,7 +308,8 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
|
|||
{
|
||||
sem_wait(&sem_end[i]);
|
||||
rms += args[i].rms;
|
||||
error_rate += args[i].error_rate;
|
||||
error_rate[0] += args[i].error_rate[0];
|
||||
error_rate[1] += args[i].error_rate[1];
|
||||
for (j=0;j<W0_size;j++)
|
||||
W0_grad[j] += args[i].W0_grad[j];
|
||||
for (j=0;j<W1_size;j++)
|
||||
|
@ -315,8 +318,9 @@ float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam
|
|||
|
||||
float mean_rate = 0, min_rate = 1e10;
|
||||
rms = (rms/(outDim*nbSamples));
|
||||
error_rate = (error_rate/(outDim*nbSamples));
|
||||
fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms);
|
||||
error_rate[0] = (error_rate[0]/(nbSamples));
|
||||
error_rate[1] = (error_rate[1]/(nbSamples));
|
||||
fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms);
|
||||
if (rms < best_rms)
|
||||
{
|
||||
best_rms = rms;
|
||||
|
@ -445,6 +449,7 @@ int main(int argc, char **argv)
|
|||
outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
|
||||
|
||||
seed = time(NULL);
|
||||
/*seed = 1361480659;*/
|
||||
fprintf (stderr, "Seed is %u\n", seed);
|
||||
srand(seed);
|
||||
build_tansig_table();
|
||||
|
|
|
@ -53,6 +53,7 @@ void print_usage( char* argv[] )
|
|||
fprintf(stderr, "-d : only runs the decoder (reads the bit-stream as input)\n" );
|
||||
fprintf(stderr, "-cbr : enable constant bitrate; default: variable bitrate\n" );
|
||||
fprintf(stderr, "-cvbr : enable constrained variable bitrate; default: unconstrained\n" );
|
||||
fprintf(stderr, "-variable-duration : enable frames of variable duration (experts only); default: disabled\n" );
|
||||
fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" );
|
||||
fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" );
|
||||
fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" );
|
||||
|
@ -221,6 +222,8 @@ int main(int argc, char *argv[])
|
|||
short *in, *out;
|
||||
int application=OPUS_APPLICATION_AUDIO;
|
||||
double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg;
|
||||
double tot_samples=0;
|
||||
opus_uint64 tot_in, tot_out;
|
||||
int bandwidth=-1;
|
||||
const char *bandwidth_string;
|
||||
int lost = 0, lost_prev = 1;
|
||||
|
@ -239,6 +242,10 @@ int main(int argc, char *argv[])
|
|||
int curr_mode=0;
|
||||
int curr_mode_count=0;
|
||||
int mode_switch_time = 48000;
|
||||
int nb_encoded;
|
||||
int remaining=0;
|
||||
int variable_duration=OPUS_FRAMESIZE_ARG;
|
||||
int delayed_decision=0;
|
||||
|
||||
if (argc < 5 )
|
||||
{
|
||||
|
@ -246,6 +253,7 @@ int main(int argc, char *argv[])
|
|||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
tot_in=tot_out=0;
|
||||
fprintf(stderr, "%s\n", opus_get_version_string());
|
||||
|
||||
args = 1;
|
||||
|
@ -306,7 +314,7 @@ int main(int argc, char *argv[])
|
|||
forcechannels = OPUS_AUTO;
|
||||
use_dtx = 0;
|
||||
packet_loss_perc = 0;
|
||||
max_frame_size = 960*6;
|
||||
max_frame_size = 2*48000;
|
||||
curr_read=0;
|
||||
|
||||
while( args < argc - 2 ) {
|
||||
|
@ -374,6 +382,14 @@ int main(int argc, char *argv[])
|
|||
check_encoder_option(decode_only, "-cvbr");
|
||||
cvbr = 1;
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) {
|
||||
check_encoder_option(decode_only, "-variable-duration");
|
||||
variable_duration = OPUS_FRAMESIZE_VARIABLE;
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) {
|
||||
check_encoder_option(decode_only, "-delayed-decision");
|
||||
delayed_decision = 1;
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-dtx") == 0 ) {
|
||||
check_encoder_option(decode_only, "-dtx");
|
||||
use_dtx = 1;
|
||||
|
@ -499,6 +515,7 @@ int main(int argc, char *argv[])
|
|||
|
||||
opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip));
|
||||
opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16));
|
||||
opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
|
||||
}
|
||||
if (!encode_only)
|
||||
{
|
||||
|
@ -554,6 +571,26 @@ int main(int argc, char *argv[])
|
|||
if ( use_inbandfec ) {
|
||||
data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char));
|
||||
}
|
||||
if(delayed_decision)
|
||||
{
|
||||
if (variable_duration!=OPUS_FRAMESIZE_VARIABLE)
|
||||
{
|
||||
if (frame_size==sampling_rate/400)
|
||||
variable_duration = OPUS_FRAMESIZE_2_5_MS;
|
||||
else if (frame_size==sampling_rate/200)
|
||||
variable_duration = OPUS_FRAMESIZE_5_MS;
|
||||
else if (frame_size==sampling_rate/100)
|
||||
variable_duration = OPUS_FRAMESIZE_10_MS;
|
||||
else if (frame_size==sampling_rate/50)
|
||||
variable_duration = OPUS_FRAMESIZE_20_MS;
|
||||
else if (frame_size==sampling_rate/25)
|
||||
variable_duration = OPUS_FRAMESIZE_40_MS;
|
||||
else
|
||||
variable_duration = OPUS_FRAMESIZE_60_MS;
|
||||
opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration));
|
||||
}
|
||||
frame_size = 2*48000;
|
||||
}
|
||||
while (!stop)
|
||||
{
|
||||
if (delayed_celt)
|
||||
|
@ -617,22 +654,28 @@ int main(int argc, char *argv[])
|
|||
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3]));
|
||||
frame_size = mode_list[curr_mode][2];
|
||||
}
|
||||
err = fread(fbytes, sizeof(short)*channels, frame_size, fin);
|
||||
err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin);
|
||||
curr_read = err;
|
||||
tot_in += curr_read;
|
||||
for(i=0;i<curr_read*channels;i++)
|
||||
{
|
||||
opus_int32 s;
|
||||
s=fbytes[2*i+1]<<8|fbytes[2*i];
|
||||
s=((s&0xFFFF)^0x8000)-0x8000;
|
||||
in[i]=s;
|
||||
in[i+remaining*channels]=s;
|
||||
}
|
||||
if (curr_read < frame_size)
|
||||
if (curr_read+remaining < frame_size)
|
||||
{
|
||||
for (i=curr_read*channels;i<frame_size*channels;i++)
|
||||
for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++)
|
||||
in[i] = 0;
|
||||
stop = 1;
|
||||
if (encode_only || decode_only)
|
||||
stop = 1;
|
||||
}
|
||||
len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
|
||||
nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]);
|
||||
remaining = frame_size-nb_encoded;
|
||||
for(i=0;i<remaining*channels;i++)
|
||||
in[i] = in[nb_encoded*channels+i];
|
||||
if (sweep_bps!=0)
|
||||
{
|
||||
bitrate_bps += sweep_bps;
|
||||
|
@ -681,6 +724,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, "Error writing.\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
tot_samples += nb_encoded;
|
||||
} else {
|
||||
int output_samples;
|
||||
lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc);
|
||||
|
@ -703,6 +747,11 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
if (output_samples>0)
|
||||
{
|
||||
if (!decode_only && tot_out + output_samples > tot_in)
|
||||
{
|
||||
stop=1;
|
||||
output_samples = tot_in-tot_out;
|
||||
}
|
||||
if (output_samples>skip) {
|
||||
int i;
|
||||
for(i=0;i<(output_samples-skip)*channels;i++)
|
||||
|
@ -716,6 +765,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, "Error writing.\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
tot_out += output_samples-skip;
|
||||
}
|
||||
if (output_samples<skip) skip -= output_samples;
|
||||
else skip = 0;
|
||||
|
@ -723,6 +773,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, "error decoding frame: %s\n",
|
||||
opus_strerror(output_samples));
|
||||
}
|
||||
tot_samples += output_samples;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -767,7 +818,7 @@ int main(int argc, char *argv[])
|
|||
toggle = (toggle + use_inbandfec) & 1;
|
||||
}
|
||||
fprintf (stderr, "average bitrate: %7.3f kb/s\n",
|
||||
1e-3*bits*sampling_rate/(frame_size*(double)count));
|
||||
1e-3*bits*sampling_rate/tot_samples);
|
||||
fprintf (stderr, "maximum bitrate: %7.3f kb/s\n",
|
||||
1e-3*bits_max*sampling_rate/frame_size);
|
||||
if (!decode_only)
|
||||
|
|
|
@ -67,6 +67,7 @@ struct OpusEncoder {
|
|||
opus_int32 Fs;
|
||||
int use_vbr;
|
||||
int vbr_constraint;
|
||||
int variable_duration;
|
||||
opus_int32 bitrate_bps;
|
||||
opus_int32 user_bitrate_bps;
|
||||
int lsb_depth;
|
||||
|
@ -89,7 +90,8 @@ struct OpusEncoder {
|
|||
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
|
||||
#ifndef FIXED_POINT
|
||||
TonalityAnalysisState analysis;
|
||||
int detected_bandwidth;
|
||||
int detected_bandwidth;
|
||||
int analysis_offset;
|
||||
#endif
|
||||
opus_uint32 rangeFinal;
|
||||
};
|
||||
|
@ -213,6 +215,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
|
|||
st->voice_ratio = -1;
|
||||
st->encoder_buffer = st->Fs/100;
|
||||
st->lsb_depth = 24;
|
||||
st->variable_duration = OPUS_FRAMESIZE_ARG;
|
||||
|
||||
/* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
|
||||
+ 1.5 ms for SILK resamplers and stereo prediction) */
|
||||
|
@ -535,8 +538,258 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
|
|||
return st->user_bitrate_bps;
|
||||
}
|
||||
|
||||
#ifndef FIXED_POINT
|
||||
/* Don't use more than 60 ms for the frame size analysis */
|
||||
#define MAX_DYNAMIC_FRAMESIZE 24
|
||||
/* Estimates how much the bitrate will be boosted based on the sub-frame energy */
|
||||
static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
|
||||
{
|
||||
int i;
|
||||
int M;
|
||||
float sumE=0, sumE_1=0;
|
||||
float metric;
|
||||
|
||||
M = IMIN(maxM, (1<<LM)+1);
|
||||
for (i=0;i<M;i++)
|
||||
{
|
||||
sumE += E[i];
|
||||
sumE_1 += E_1[i];
|
||||
}
|
||||
metric = sumE*sumE_1/(M*M);
|
||||
/*if (LM==3)
|
||||
printf("%f\n", metric);*/
|
||||
/*return metric>10 ? 1 : 0;*/
|
||||
/*return MAX16(0,1-exp(-.25*(metric-2.)));*/
|
||||
return MIN16(1,sqrt(MAX16(0,.05*(metric-2))));
|
||||
}
|
||||
|
||||
/* Viterbi decoding trying to find the best frame size combination using look-ahead
|
||||
|
||||
State numbering:
|
||||
0: unused
|
||||
1: 2.5 ms
|
||||
2: 5 ms (#1)
|
||||
3: 5 ms (#2)
|
||||
4: 10 ms (#1)
|
||||
5: 10 ms (#2)
|
||||
6: 10 ms (#3)
|
||||
7: 10 ms (#4)
|
||||
8: 20 ms (#1)
|
||||
9: 20 ms (#2)
|
||||
10: 20 ms (#3)
|
||||
11: 20 ms (#4)
|
||||
12: 20 ms (#5)
|
||||
13: 20 ms (#6)
|
||||
14: 20 ms (#7)
|
||||
15: 20 ms (#8)
|
||||
*/
|
||||
static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
|
||||
{
|
||||
int i;
|
||||
float cost[MAX_DYNAMIC_FRAMESIZE][16];
|
||||
int states[MAX_DYNAMIC_FRAMESIZE][16];
|
||||
float best_cost;
|
||||
int best_state;
|
||||
|
||||
/* Makes variable framesize less aggressive at lower bitrates, but I can't
|
||||
find any valid theretical justification for this (other than it seems
|
||||
to help) */
|
||||
frame_cost *= 720/rate;
|
||||
for (i=0;i<16;i++)
|
||||
{
|
||||
/* Impossible state */
|
||||
states[0][i] = -1;
|
||||
cost[0][i] = 1e10;
|
||||
}
|
||||
for (i=0;i<4;i++)
|
||||
{
|
||||
cost[0][1<<i] = frame_cost + rate*(1<<i)*transient_boost(E, E_1, i, N+1);
|
||||
states[0][1<<i] = i;
|
||||
}
|
||||
for (i=1;i<N;i++)
|
||||
{
|
||||
int j;
|
||||
|
||||
/* Follow continuations */
|
||||
for (j=2;j<16;j++)
|
||||
{
|
||||
cost[i][j] = cost[i-1][j-1];
|
||||
states[i][j] = j-1;
|
||||
}
|
||||
|
||||
/* New frames */
|
||||
for(j=0;j<4;j++)
|
||||
{
|
||||
int k;
|
||||
float min_cost;
|
||||
float curr_cost;
|
||||
states[i][1<<j] = 1;
|
||||
min_cost = cost[i-1][1];
|
||||
for(k=1;k<4;k++)
|
||||
{
|
||||
float tmp = cost[i-1][(1<<(k+1))-1];
|
||||
if (tmp < min_cost)
|
||||
{
|
||||
states[i][1<<j] = (1<<(k+1))-1;
|
||||
min_cost = tmp;
|
||||
}
|
||||
}
|
||||
curr_cost = frame_cost+rate*(1<<j)*transient_boost(E+i, E_1+i, j, N-i+1);
|
||||
cost[i][1<<j] = min_cost;
|
||||
/* If part of the frame is outside the analysis window, only count part of the cost */
|
||||
if (N-i < (1<<j))
|
||||
cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
|
||||
else
|
||||
cost[i][1<<j] += curr_cost;
|
||||
}
|
||||
}
|
||||
|
||||
best_state=1;
|
||||
best_cost = cost[N-1][1];
|
||||
/* Find best end state (doesn't force a frame to end at N-1) */
|
||||
for (i=2;i<16;i++)
|
||||
{
|
||||
if (cost[N-1][i]<best_cost)
|
||||
{
|
||||
best_cost = cost[N-1][i];
|
||||
best_state = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* Follow transitions back */
|
||||
for (i=N-1;i>=0;i--)
|
||||
{
|
||||
/*printf("%d ", best_state);*/
|
||||
best_state = states[i][best_state];
|
||||
}
|
||||
/*printf("%d\n", best_state);*/
|
||||
return best_state;
|
||||
}
|
||||
|
||||
void downmix_float(const void *_x, float *sub, int subframe, int offset, int C)
|
||||
{
|
||||
const float *x;
|
||||
int c, j;
|
||||
x = (const float *)_x;
|
||||
for (j=0;j<subframe;j++)
|
||||
sub[j] = x[(j+offset)*C];
|
||||
for (c=1;c<C;c++)
|
||||
for (j=0;j<subframe;j++)
|
||||
sub[j] += x[(j+offset)*C+c];
|
||||
}
|
||||
|
||||
void downmix_int(const void *_x, float *sub, int subframe, int offset, int C)
|
||||
{
|
||||
const opus_int16 *x;
|
||||
int c, j;
|
||||
x = (const opus_int16 *)_x;
|
||||
for (j=0;j<subframe;j++)
|
||||
sub[j] = x[(j+offset)*C];
|
||||
for (c=1;c<C;c++)
|
||||
for (j=0;j<subframe;j++)
|
||||
sub[j] += x[(j+offset)*C+c];
|
||||
}
|
||||
|
||||
int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
|
||||
int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
|
||||
downmix_func downmix)
|
||||
{
|
||||
int N;
|
||||
int i;
|
||||
float e[MAX_DYNAMIC_FRAMESIZE+4];
|
||||
float e_1[MAX_DYNAMIC_FRAMESIZE+3];
|
||||
float memx;
|
||||
int bestLM=0;
|
||||
int subframe;
|
||||
int pos;
|
||||
VARDECL(opus_val16, sub);
|
||||
|
||||
subframe = Fs/400;
|
||||
ALLOC(sub, subframe, opus_val16);
|
||||
e[0]=mem[0];
|
||||
e_1[0]=1./(EPSILON+mem[0]);
|
||||
if (buffering)
|
||||
{
|
||||
/* Consider the CELT delay when not in restricted-lowdelay */
|
||||
/* We assume the buffering is between 2.5 and 5 ms */
|
||||
int offset = 2*subframe - buffering;
|
||||
celt_assert(offset>=0 && offset <= subframe);
|
||||
x += C*offset;
|
||||
len -= offset;
|
||||
e[1]=mem[1];
|
||||
e_1[1]=1./(EPSILON+mem[1]);
|
||||
e[2]=mem[2];
|
||||
e_1[2]=1./(EPSILON+mem[2]);
|
||||
pos = 3;
|
||||
} else {
|
||||
pos=1;
|
||||
}
|
||||
N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
|
||||
memx = x[0];
|
||||
for (i=0;i<N;i++)
|
||||
{
|
||||
float tmp;
|
||||
float tmpx;
|
||||
int j;
|
||||
tmp=EPSILON;
|
||||
|
||||
downmix(x, sub, subframe, i*subframe, C);
|
||||
if (i==0)
|
||||
memx = sub[0];
|
||||
for (j=0;j<subframe;j++)
|
||||
{
|
||||
tmpx = sub[j];
|
||||
tmp += (tmpx-memx)*(tmpx-memx);
|
||||
memx = tmpx;
|
||||
}
|
||||
e[i+pos] = tmp;
|
||||
e_1[i+pos] = 1.f/tmp;
|
||||
}
|
||||
/* Hack to get 20 ms working with APPLICATION_AUDIO
|
||||
The real problem is that the corresponding memory needs to use 1.5 ms
|
||||
from this frame and 1 ms from the next frame */
|
||||
e[i+pos] = e[i+pos-1];
|
||||
if (buffering)
|
||||
N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
|
||||
bestLM = transient_viterbi(e, e_1, N, (1.f+.5*tonality)*(40*C+40), bitrate/400);
|
||||
mem[0] = e[1<<bestLM];
|
||||
if (buffering)
|
||||
{
|
||||
mem[1] = e[(1<<bestLM)+1];
|
||||
mem[2] = e[(1<<bestLM)+2];
|
||||
}
|
||||
return bestLM;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
|
||||
{
|
||||
int new_size;
|
||||
if (frame_size<Fs/400)
|
||||
return -1;
|
||||
if (variable_duration == OPUS_FRAMESIZE_ARG)
|
||||
new_size = frame_size;
|
||||
else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
|
||||
new_size = Fs/50;
|
||||
else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
|
||||
new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
|
||||
else
|
||||
return -1;
|
||||
if (new_size>frame_size)
|
||||
return -1;
|
||||
if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
|
||||
50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
|
||||
return -1;
|
||||
return new_size;
|
||||
}
|
||||
|
||||
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth)
|
||||
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
|
||||
#ifndef FIXED_POINT
|
||||
, AnalysisInfo *analysis_info
|
||||
#endif
|
||||
)
|
||||
{
|
||||
void *silk_enc;
|
||||
CELTEncoder *celt_enc;
|
||||
|
@ -563,11 +816,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
int curr_bandwidth;
|
||||
opus_val16 HB_gain;
|
||||
opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
|
||||
int extra_buffer, total_buffer;
|
||||
int perform_analysis=0;
|
||||
#ifndef FIXED_POINT
|
||||
AnalysisInfo analysis_info;
|
||||
#endif
|
||||
int total_buffer;
|
||||
VARDECL(opus_val16, tmp_prefill);
|
||||
|
||||
ALLOC_STACK;
|
||||
|
@ -575,36 +824,37 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
max_data_bytes = IMIN(1276, out_data_bytes);
|
||||
|
||||
st->rangeFinal = 0;
|
||||
if (400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
|
||||
if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
|
||||
50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
if (max_data_bytes<=0)
|
||||
|| (400*frame_size < st->Fs)
|
||||
|| max_data_bytes<=0
|
||||
)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
silk_enc = (char*)st+st->silk_enc_offset;
|
||||
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
|
||||
|
||||
lsb_depth = IMIN(lsb_depth, st->lsb_depth);
|
||||
|
||||
#ifndef FIXED_POINT
|
||||
perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000;
|
||||
#endif
|
||||
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
|
||||
delay_compensation = 0;
|
||||
else
|
||||
delay_compensation = st->delay_compensation;
|
||||
if (perform_analysis)
|
||||
|
||||
lsb_depth = IMIN(lsb_depth, st->lsb_depth);
|
||||
|
||||
st->voice_ratio = -1;
|
||||
|
||||
#ifndef FIXED_POINT
|
||||
st->detected_bandwidth = 0;
|
||||
if (analysis_info->valid)
|
||||
{
|
||||
total_buffer = IMAX(st->Fs/200, delay_compensation);
|
||||
} else {
|
||||
total_buffer = delay_compensation;
|
||||
if (st->signal_type == OPUS_AUTO)
|
||||
st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob));
|
||||
st->detected_bandwidth = analysis_info->opus_bandwidth;
|
||||
}
|
||||
extra_buffer = total_buffer-delay_compensation;
|
||||
#endif
|
||||
|
||||
total_buffer = delay_compensation;
|
||||
st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
|
||||
|
||||
frame_rate = st->Fs/frame_size;
|
||||
|
@ -916,7 +1166,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
/* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
|
||||
if (to_celt && i==nb_frames-1)
|
||||
st->user_forced_mode = MODE_CELT_ONLY;
|
||||
tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth);
|
||||
tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth
|
||||
#ifndef FIXED_POINT
|
||||
, analysis_info
|
||||
#endif
|
||||
);
|
||||
if (tmp_len<0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
|
@ -942,7 +1196,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
|
||||
curr_bandwidth = st->bandwidth;
|
||||
|
||||
/* Chooses the appropriate mode for speech
|
||||
|
@ -981,22 +1234,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
|
||||
}
|
||||
|
||||
#ifndef FIXED_POINT
|
||||
if (perform_analysis)
|
||||
{
|
||||
int nb_analysis_frames;
|
||||
nb_analysis_frames = frame_size/(st->Fs/100);
|
||||
for (i=0;i<nb_analysis_frames;i++)
|
||||
tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels, lsb_depth);
|
||||
if (st->signal_type == OPUS_AUTO)
|
||||
st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
|
||||
st->detected_bandwidth = analysis_info.opus_bandwidth;
|
||||
} else {
|
||||
analysis_info.valid = 0;
|
||||
st->voice_ratio = -1;
|
||||
st->detected_bandwidth = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* SILK processing */
|
||||
HB_gain = Q15ONE;
|
||||
|
@ -1205,9 +1443,18 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
} else {
|
||||
if (st->use_vbr)
|
||||
{
|
||||
opus_int32 bonus=0;
|
||||
#ifndef FIXED_POINT
|
||||
if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
|
||||
{
|
||||
bonus = (40*st->stream_channels+40)*(st->Fs/frame_size-50);
|
||||
if (analysis_info->valid)
|
||||
bonus = bonus*(1.f+.5*analysis_info->tonality);
|
||||
}
|
||||
#endif
|
||||
celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
|
||||
celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
|
||||
celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
|
||||
celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
|
||||
nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
|
||||
} else {
|
||||
nb_compr_bytes = bytes_target;
|
||||
|
@ -1222,7 +1469,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
|
||||
{
|
||||
for (i=0;i<st->channels*st->Fs/400;i++)
|
||||
tmp_prefill[i] = st->delay_buffer[(extra_buffer+st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
|
||||
tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
|
||||
}
|
||||
|
||||
for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
|
||||
|
@ -1236,7 +1483,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
const CELTMode *celt_mode;
|
||||
|
||||
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
|
||||
gain_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels,
|
||||
gain_fade(pcm_buf, pcm_buf,
|
||||
st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
|
||||
}
|
||||
st->prev_HB_gain = HB_gain;
|
||||
|
@ -1258,7 +1505,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
g1 *= (1.f/16384);
|
||||
g2 *= (1.f/16384);
|
||||
#endif
|
||||
stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap,
|
||||
stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
|
||||
frame_size, st->channels, celt_mode->window, st->Fs);
|
||||
st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
|
||||
}
|
||||
|
@ -1312,7 +1559,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
int err;
|
||||
celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
|
||||
celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
|
||||
err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
|
||||
err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
|
||||
if (err < 0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
|
@ -1339,10 +1586,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
if (ec_tell(&enc) <= 8*nb_compr_bytes)
|
||||
{
|
||||
#ifndef FIXED_POINT
|
||||
if (perform_analysis)
|
||||
celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
|
||||
celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
|
||||
#endif
|
||||
ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc);
|
||||
ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
|
||||
if (ret < 0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
|
@ -1365,9 +1611,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
|
|||
celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
|
||||
|
||||
/* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
|
||||
celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL);
|
||||
celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
|
||||
|
||||
err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
|
||||
err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
|
||||
if (err < 0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
|
@ -1440,6 +1686,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
|
|||
VARDECL(opus_int16, in);
|
||||
ALLOC_STACK;
|
||||
|
||||
frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
|
||||
if(frame_size<0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
|
@ -1459,6 +1706,12 @@ opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
|
|||
opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
|
||||
unsigned char *data, opus_int32 out_data_bytes)
|
||||
{
|
||||
frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
|
||||
if(frame_size<0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16);
|
||||
}
|
||||
|
||||
|
@ -1467,21 +1720,74 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size,
|
|||
unsigned char *data, opus_int32 max_data_bytes)
|
||||
{
|
||||
int i, ret;
|
||||
const CELTMode *celt_mode;
|
||||
int delay_compensation;
|
||||
int lsb_depth;
|
||||
VARDECL(float, in);
|
||||
AnalysisInfo analysis_info;
|
||||
ALLOC_STACK;
|
||||
|
||||
opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
|
||||
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
|
||||
delay_compensation = 0;
|
||||
else
|
||||
delay_compensation = st->delay_compensation;
|
||||
|
||||
lsb_depth = IMIN(16, st->lsb_depth);
|
||||
|
||||
analysis_info.valid = 0;
|
||||
if (st->silk_mode.complexity >= 7 && st->Fs==48000)
|
||||
{
|
||||
frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
|
||||
frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info);
|
||||
} else {
|
||||
frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
|
||||
}
|
||||
if(frame_size<0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
|
||||
ALLOC(in, frame_size*st->channels, float);
|
||||
|
||||
for (i=0;i<frame_size*st->channels;i++)
|
||||
in[i] = (1.0f/32768)*pcm[i];
|
||||
ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16);
|
||||
ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info);
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size,
|
||||
unsigned char *data, opus_int32 out_data_bytes)
|
||||
{
|
||||
return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24);
|
||||
const CELTMode *celt_mode;
|
||||
int delay_compensation;
|
||||
int lsb_depth;
|
||||
AnalysisInfo analysis_info;
|
||||
|
||||
opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode));
|
||||
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
|
||||
delay_compensation = 0;
|
||||
else
|
||||
delay_compensation = st->delay_compensation;
|
||||
|
||||
lsb_depth = IMIN(24, st->lsb_depth);
|
||||
|
||||
analysis_info.valid = 0;
|
||||
if (st->silk_mode.complexity >= 7 && st->Fs==48000)
|
||||
{
|
||||
frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset,
|
||||
frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info);
|
||||
} else {
|
||||
frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs);
|
||||
}
|
||||
if(frame_size<0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
|
||||
return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -1750,6 +2056,18 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
|
|||
*value = st->lsb_depth;
|
||||
}
|
||||
break;
|
||||
case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
|
||||
{
|
||||
opus_int32 value = va_arg(ap, opus_int32);
|
||||
st->variable_duration = value;
|
||||
}
|
||||
break;
|
||||
case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
|
||||
{
|
||||
opus_int32 *value = va_arg(ap, opus_int32*);
|
||||
*value = st->variable_duration;
|
||||
}
|
||||
break;
|
||||
case OPUS_RESET_STATE:
|
||||
{
|
||||
void *silk_enc;
|
||||
|
@ -1779,6 +2097,15 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
|
|||
st->user_forced_mode = value;
|
||||
}
|
||||
break;
|
||||
|
||||
case CELT_GET_MODE_REQUEST:
|
||||
{
|
||||
const CELTMode ** value = va_arg(ap, const CELTMode**);
|
||||
if (value==0)
|
||||
goto bad_arg;
|
||||
celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
|
||||
ret = OPUS_UNIMPLEMENTED;
|
||||
|
|
|
@ -36,10 +36,14 @@
|
|||
#include <stdarg.h>
|
||||
#include "float_cast.h"
|
||||
#include "os_support.h"
|
||||
#include "analysis.h"
|
||||
|
||||
struct OpusMSEncoder {
|
||||
TonalityAnalysisState analysis;
|
||||
ChannelLayout layout;
|
||||
int bitrate;
|
||||
int variable_duration;
|
||||
opus_int32 bitrate_bps;
|
||||
opus_val32 subframe_mem[3];
|
||||
/* Encoder states go here */
|
||||
};
|
||||
|
||||
|
@ -102,6 +106,8 @@ int opus_multistream_encoder_init(
|
|||
st->layout.nb_streams = streams;
|
||||
st->layout.nb_coupled_streams = coupled_streams;
|
||||
|
||||
st->bitrate_bps = OPUS_AUTO;
|
||||
st->variable_duration = OPUS_FRAMESIZE_ARG;
|
||||
for (i=0;i<st->layout.nb_channels;i++)
|
||||
st->layout.mapping[i] = mapping[i];
|
||||
if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout))
|
||||
|
@ -182,6 +188,10 @@ static int opus_multistream_encode_native
|
|||
unsigned char *data,
|
||||
opus_int32 max_data_bytes,
|
||||
int lsb_depth
|
||||
#ifndef FIXED_POINT
|
||||
, downmix_func downmix
|
||||
, const void *pcm_analysis
|
||||
#endif
|
||||
)
|
||||
{
|
||||
opus_int32 Fs;
|
||||
|
@ -193,10 +203,43 @@ static int opus_multistream_encode_native
|
|||
VARDECL(opus_val16, buf);
|
||||
unsigned char tmp_data[MS_FRAME_TMP];
|
||||
OpusRepacketizer rp;
|
||||
int orig_frame_size;
|
||||
int coded_channels;
|
||||
opus_int32 channel_rate;
|
||||
opus_int32 complexity;
|
||||
AnalysisInfo analysis_info;
|
||||
const CELTMode *celt_mode;
|
||||
ALLOC_STACK;
|
||||
|
||||
ptr = (char*)st + align(sizeof(OpusMSEncoder));
|
||||
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
|
||||
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity));
|
||||
opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode));
|
||||
|
||||
if (400*frame_size < Fs)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
orig_frame_size = IMIN(frame_size,Fs/50);
|
||||
#ifndef FIXED_POINT
|
||||
analysis_info.valid = 0;
|
||||
if (complexity >= 7 && Fs==48000)
|
||||
{
|
||||
opus_int32 delay_compensation;
|
||||
int channels;
|
||||
|
||||
channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
|
||||
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation));
|
||||
delay_compensation -= Fs/400;
|
||||
|
||||
frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis,
|
||||
frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
frame_size = frame_size_select(frame_size, st->variable_duration, Fs);
|
||||
}
|
||||
/* Validate frame_size before using it to allocate stack space.
|
||||
This mirrors the checks in opus_encode[_float](). */
|
||||
if (400*frame_size != Fs && 200*frame_size != Fs &&
|
||||
|
@ -215,6 +258,39 @@ static int opus_multistream_encode_native
|
|||
RESTORE_STACK;
|
||||
return OPUS_BUFFER_TOO_SMALL;
|
||||
}
|
||||
|
||||
/* Compute bitrate allocation between streams (this could be a lot better) */
|
||||
coded_channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
|
||||
if (st->bitrate_bps==OPUS_AUTO)
|
||||
{
|
||||
channel_rate = Fs+60*Fs/orig_frame_size;
|
||||
} else if (st->bitrate_bps==OPUS_BITRATE_MAX)
|
||||
{
|
||||
channel_rate = 300000;
|
||||
} else {
|
||||
channel_rate = st->bitrate_bps/coded_channels;
|
||||
}
|
||||
#ifndef FIXED_POINT
|
||||
if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50)
|
||||
{
|
||||
opus_int32 bonus;
|
||||
bonus = 60*(Fs/frame_size-50);
|
||||
channel_rate += bonus;
|
||||
}
|
||||
#endif
|
||||
ptr = (char*)st + align(sizeof(OpusMSEncoder));
|
||||
for (s=0;s<st->layout.nb_streams;s++)
|
||||
{
|
||||
OpusEncoder *enc;
|
||||
enc = (OpusEncoder*)ptr;
|
||||
if (s < st->layout.nb_coupled_streams)
|
||||
ptr += align(coupled_size);
|
||||
else
|
||||
ptr += align(mono_size);
|
||||
opus_encoder_ctl(enc, OPUS_SET_BITRATE(channel_rate * (s < st->layout.nb_coupled_streams ? 2 : 1)));
|
||||
}
|
||||
|
||||
ptr = (char*)st + align(sizeof(OpusMSEncoder));
|
||||
/* Counting ToC */
|
||||
tot_size = 0;
|
||||
for (s=0;s<st->layout.nb_streams;s++)
|
||||
|
@ -246,7 +322,11 @@ static int opus_multistream_encode_native
|
|||
/* Reserve three bytes for the last stream and four for the others */
|
||||
curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1);
|
||||
curr_max = IMIN(curr_max,MS_FRAME_TMP);
|
||||
len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth);
|
||||
len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth
|
||||
#ifndef FIXED_POINT
|
||||
, &analysis_info
|
||||
#endif
|
||||
);
|
||||
if (len<0)
|
||||
{
|
||||
RESTORE_STACK;
|
||||
|
@ -345,8 +425,9 @@ int opus_multistream_encode_float
|
|||
opus_int32 max_data_bytes
|
||||
)
|
||||
{
|
||||
int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
|
||||
return opus_multistream_encode_native(st, opus_copy_channel_in_float,
|
||||
pcm, frame_size, data, max_data_bytes, 24);
|
||||
pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset);
|
||||
}
|
||||
|
||||
int opus_multistream_encode(
|
||||
|
@ -357,8 +438,9 @@ int opus_multistream_encode(
|
|||
opus_int32 max_data_bytes
|
||||
)
|
||||
{
|
||||
int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
|
||||
return opus_multistream_encode_native(st, opus_copy_channel_in_short,
|
||||
pcm, frame_size, data, max_data_bytes, 16);
|
||||
pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -378,20 +460,10 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
|
|||
{
|
||||
case OPUS_SET_BITRATE_REQUEST:
|
||||
{
|
||||
int chan, s;
|
||||
opus_int32 value = va_arg(ap, opus_int32);
|
||||
chan = st->layout.nb_streams + st->layout.nb_coupled_streams;
|
||||
value /= chan;
|
||||
for (s=0;s<st->layout.nb_streams;s++)
|
||||
{
|
||||
OpusEncoder *enc;
|
||||
enc = (OpusEncoder*)ptr;
|
||||
if (s < st->layout.nb_coupled_streams)
|
||||
ptr += align(coupled_size);
|
||||
else
|
||||
ptr += align(mono_size);
|
||||
opus_encoder_ctl(enc, request, value * (s < st->layout.nb_coupled_streams ? 2 : 1));
|
||||
}
|
||||
if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX)
|
||||
goto bad_arg;
|
||||
st->bitrate_bps = value;
|
||||
}
|
||||
break;
|
||||
case OPUS_GET_BITRATE_REQUEST:
|
||||
|
@ -504,7 +576,21 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
|
|||
}
|
||||
*value = (OpusEncoder*)ptr;
|
||||
}
|
||||
break;
|
||||
break;
|
||||
case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
|
||||
{
|
||||
opus_int32 value = va_arg(ap, opus_int32);
|
||||
if (value<0 || value>1)
|
||||
goto bad_arg;
|
||||
st->variable_duration = value;
|
||||
}
|
||||
break;
|
||||
case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
|
||||
{
|
||||
opus_int32 *value = va_arg(ap, opus_int32*);
|
||||
*value = st->variable_duration;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ret = OPUS_UNIMPLEMENTED;
|
||||
break;
|
||||
|
@ -512,6 +598,9 @@ int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...)
|
|||
|
||||
va_end(ap);
|
||||
return ret;
|
||||
bad_arg:
|
||||
va_end(ap);
|
||||
return OPUS_BAD_ARG;
|
||||
}
|
||||
|
||||
void opus_multistream_encoder_destroy(OpusMSEncoder *st)
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "arch.h"
|
||||
#include "opus.h"
|
||||
#include "celt.h"
|
||||
|
||||
struct OpusRepacketizer {
|
||||
unsigned char toc;
|
||||
|
@ -81,11 +82,24 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
|
|||
#define OPUS_SET_FORCE_MODE_REQUEST 11002
|
||||
#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
|
||||
|
||||
typedef void (*downmix_func)(const void *, float *, int, int, int);
|
||||
void downmix_float(const void *_x, float *sub, int subframe, int offset, int C);
|
||||
void downmix_int(const void *_x, float *sub, int subframe, int offset, int C);
|
||||
|
||||
int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
|
||||
int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,
|
||||
downmix_func downmix);
|
||||
|
||||
int encode_size(int size, unsigned char *data);
|
||||
|
||||
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs);
|
||||
|
||||
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
|
||||
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth);
|
||||
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth
|
||||
#ifndef FIXED_POINT
|
||||
, AnalysisInfo *analysis_info
|
||||
#endif
|
||||
);
|
||||
|
||||
int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
|
||||
opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue