First pass at making the analysis code run with FIXED_POINT

Code is still float, but at least tonality esitmation seems to work.
Speech/music analysis is still disabled.
This commit is contained in:
Jean-Marc Valin 2013-09-06 16:00:39 -04:00
parent 91904a4c91
commit 3ab03e0556
10 changed files with 70 additions and 41 deletions

View file

@ -20,9 +20,10 @@ if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED) SILK_SOURCES += $(SILK_SOURCES_FIXED)
else else
SILK_SOURCES += $(SILK_SOURCES_FLOAT) SILK_SOURCES += $(SILK_SOURCES_FLOAT)
OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
endif endif
OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
if CPU_ARM if CPU_ARM
CELT_SOURCES += $(CELT_SOURCES_ARM) CELT_SOURCES += $(CELT_SOURCES_ARM)
endif endif

View file

@ -185,6 +185,7 @@ typedef float celt_ener;
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
#define MULT16_16_Q11_32(a,b) ((a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b))
#define MULT16_16_Q11(a,b) ((a)*(b))
#define MULT16_16_Q13(a,b) ((a)*(b)) #define MULT16_16_Q13(a,b) ((a)*(b))
#define MULT16_16_Q14(a,b) ((a)*(b)) #define MULT16_16_Q14(a,b) ((a)*(b))
#define MULT16_16_Q15(a,b) ((a)*(b)) #define MULT16_16_Q15(a,b) ((a)*(b))

View file

@ -52,11 +52,11 @@ extern "C" {
typedef struct { typedef struct {
int valid; int valid;
opus_val16 tonality; float tonality;
opus_val16 tonality_slope; float tonality_slope;
opus_val16 noisiness; float noisiness;
opus_val16 activity; float activity;
opus_val16 music_prob; float music_prob;
int bandwidth; int bandwidth;
}AnalysisInfo; }AnalysisInfo;

View file

@ -819,7 +819,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= SHR16(surround_trim, DB_SHIFT-8);
trim -= 2*SHR16(tf_estimate, 14-8); trim -= 2*SHR16(tf_estimate, 14-8);
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
if (analysis->valid) if (analysis->valid)
{ {
trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f))); trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f)));
@ -1142,7 +1142,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
target = base_target; target = base_target;
/*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
if (analysis->valid && analysis->activity<.4) if (analysis->valid && analysis->activity<.4)
target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity));
#endif #endif
@ -1167,7 +1167,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
QCONST16(0.02f,14) : QCONST16(0.04f,14); QCONST16(0.02f,14) : QCONST16(0.04f,14);
target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1);
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
/* Apply tonality boost */ /* Apply tonality boost */
if (analysis->valid && !lfe) if (analysis->valid && !lfe)
{ {
@ -1872,7 +1872,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
bits -= anti_collapse_rsv; bits -= anti_collapse_rsv;
signalBandwidth = st->end-1; signalBandwidth = st->end-1;
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
if (st->analysis.valid) if (st->analysis.valid)
{ {
int min_bandwidth; int min_bandwidth;

View file

@ -116,6 +116,7 @@
#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))

View file

@ -253,10 +253,10 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
for (i=0;i<N2;i++) for (i=0;i<N2;i++)
{ {
float w = analysis_window[i]; float w = analysis_window[i];
in[i].r = MULT16_16(w, tonal->inmem[i]); in[i].r = w*tonal->inmem[i];
in[i].i = MULT16_16(w, tonal->inmem[N2+i]); in[i].i = w*tonal->inmem[N2+i];
in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]); in[N-i-1].r = w*tonal->inmem[N-i-1];
in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]); in[N-i-1].i = w*tonal->inmem[N+N2-i-1];
} }
OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
@ -325,8 +325,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
float stationarity; float stationarity;
for (i=tbands[b];i<tbands[b+1];i++) for (i=tbands[b];i<tbands[b+1];i++)
{ {
float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+ out[i].i*out[i].i + out[N-i].i*out[N-i].i; + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
E += binE; E += binE;
tE += binE*tonality[i]; tE += binE*tonality[i];
nE += binE*2.f*(.5f-noisiness[i]); nE += binE*2.f*(.5f-noisiness[i]);
@ -334,7 +334,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
tonal->E[tonal->E_count][b] = E; tonal->E[tonal->E_count][b] = E;
frame_noisiness += nE/(1e-15f+E); frame_noisiness += nE/(1e-15f+E);
frame_loudness += celt_sqrt(E+1e-10f); frame_loudness += sqrt(E+1e-10f);
logE[b] = (float)log(E+1e-10f); logE[b] = (float)log(E+1e-10f);
tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
@ -348,11 +348,11 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
L1=L2=0; L1=L2=0;
for (i=0;i<NB_FRAMES;i++) for (i=0;i<NB_FRAMES;i++)
{ {
L1 += celt_sqrt(tonal->E[i][b]); L1 += sqrt(tonal->E[i][b]);
L2 += tonal->E[i][b]; L2 += tonal->E[i][b];
} }
stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2)); stationarity = MIN16(0.99f,L1/sqrt(EPSILON+NB_FRAMES*L2));
stationarity *= stationarity; stationarity *= stationarity;
stationarity *= stationarity; stationarity *= stationarity;
frame_stationarity += stationarity; frame_stationarity += stationarity;
@ -379,6 +379,9 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
bandwidth = 0; bandwidth = 0;
maxE = 0; maxE = 0;
noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8)));
#ifdef FIXED_POINT
noise_floor *= 1<<(15+SIG_SHIFT);
#endif
noise_floor *= noise_floor; noise_floor *= noise_floor;
for (b=0;b<NB_TOT_BANDS;b++) for (b=0;b<NB_TOT_BANDS;b++)
{ {
@ -389,8 +392,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
band_end = extra_bands[b+1]; band_end = extra_bands[b+1];
for (i=band_start;i<band_end;i++) for (i=band_start;i<band_end;i++)
{ {
float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r
+ out[i].i*out[i].i + out[N-i].i*out[N-i].i; + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i;
E += binE; E += binE;
} }
maxE = MAX32(maxE, E); maxE = MAX32(maxE, E);
@ -469,7 +472,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
tonal->mem[i] = BFCC[i]; tonal->mem[i] = BFCC[i];
} }
for (i=0;i<9;i++) for (i=0;i<9;i++)
features[11+i] = celt_sqrt(tonal->std[i]); features[11+i] = sqrt(tonal->std[i]);
features[20] = info->tonality; features[20] = info->tonality;
features[21] = info->activity; features[21] = info->activity;
features[22] = frame_stationarity; features[22] = frame_stationarity;

View file

@ -42,7 +42,7 @@ typedef struct {
float angle[240]; float angle[240];
float d_angle[240]; float d_angle[240];
float d2_angle[240]; float d2_angle[240];
float inmem[ANALYSIS_BUF_SIZE]; opus_val32 inmem[ANALYSIS_BUF_SIZE];
int mem_fill; /* number of usable samples in the buffer */ int mem_fill; /* number of usable samples in the buffer */
float prev_band_tonality[NB_TBANDS]; float prev_band_tonality[NB_TBANDS];
float prev_tonality; float prev_tonality;

View file

@ -43,9 +43,9 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
/*double x, y;*/ /*double x, y;*/
opus_val16 dy, yy; /* Q14 */ opus_val16 dy, yy; /* Q14 */
/*x = 1.9073e-06*_x;*/ /*x = 1.9073e-06*_x;*/
if (_x>=QCONST32(10,19)) if (_x>=QCONST32(8,19))
return QCONST32(1.,14); return QCONST32(1.,14);
if (_x<=-QCONST32(10,19)) if (_x<=-QCONST32(8,19))
return -QCONST32(1.,14); return -QCONST32(1.,14);
xx = EXTRACT16(SHR32(_x, 8)); xx = EXTRACT16(SHR32(_x, 8));
/*i = lrint(25*x);*/ /*i = lrint(25*x);*/

View file

@ -98,7 +98,7 @@ struct OpusEncoder {
int energy_masking; int energy_masking;
StereoWidthState width_mem; StereoWidthState width_mem;
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
TonalityAnalysisState analysis; TonalityAnalysisState analysis;
int detected_bandwidth; int detected_bandwidth;
int analysis_offset; int analysis_offset;
@ -551,7 +551,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
return st->user_bitrate_bps; return st->user_bitrate_bps;
} }
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
/* Don't use more than 60 ms for the frame size analysis */ /* Don't use more than 60 ms for the frame size analysis */
#define MAX_DYNAMIC_FRAMESIZE 24 #define MAX_DYNAMIC_FRAMESIZE 24
/* Estimates how much the bitrate will be boosted based on the sub-frame energy */ /* Estimates how much the bitrate will be boosted based on the sub-frame energy */
@ -697,10 +697,10 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
int bestLM=0; int bestLM=0;
int subframe; int subframe;
int pos; int pos;
VARDECL(opus_val16, sub); VARDECL(opus_val32, sub);
subframe = Fs/400; subframe = Fs/400;
ALLOC(sub, subframe, opus_val16); ALLOC(sub, subframe, opus_val32);
e[0]=mem[0]; e[0]=mem[0];
e_1[0]=1.f/(EPSILON+mem[0]); e_1[0]=1.f/(EPSILON+mem[0]);
if (buffering) if (buffering)
@ -759,30 +759,41 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
#endif #endif
#ifndef DISABLE_FLOAT_API #ifndef DISABLE_FLOAT_API
void downmix_float(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C) void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
{ {
const float *x; const float *x;
int j; int j;
x = (const float *)_x; x = (const float *)_x;
for (j=0;j<subframe;j++) for (j=0;j<subframe;j++)
sub[j] = x[(j+offset)*C+c1]; sub[j] = SCALEIN(x[(j+offset)*C+c1]);
if (c2>-1) if (c2>-1)
{ {
for (j=0;j<subframe;j++) for (j=0;j<subframe;j++)
sub[j] += x[(j+offset)*C+c2]; sub[j] += SCALEIN(x[(j+offset)*C+c2]);
} else if (c2==-2) } else if (c2==-2)
{ {
int c; int c;
for (c=1;c<C;c++) for (c=1;c<C;c++)
{ {
for (j=0;j<subframe;j++) for (j=0;j<subframe;j++)
sub[j] += x[(j+offset)*C+c]; sub[j] += SCALEIN(x[(j+offset)*C+c]);
} }
} }
#ifdef FIXED_POINT
{
opus_val32 scale =(1<<SIG_SHIFT);
if (C==-2)
scale /= C;
else
scale /= 2;
for (j=0;j<subframe;j++)
sub[j] *= scale;
}
#endif
} }
#endif #endif
void downmix_int(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C) void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
{ {
const opus_int16 *x; const opus_int16 *x;
int j; int j;
@ -802,8 +813,20 @@ void downmix_int(const void *_x, float *sub, int subframe, int offset, int c1, i
sub[j] += x[(j+offset)*C+c]; sub[j] += x[(j+offset)*C+c];
} }
} }
#ifdef FIXED_POINT
{
opus_val32 scale =(1<<SIG_SHIFT);
if (C==-2)
scale /= C;
else
scale /= 2;
for (j=0;j<subframe;j++)
sub[j] *= scale;
}
#else
for (j=0;j<subframe;j++) for (j=0;j<subframe;j++)
sub[j] *= (1.f/32768); sub[j] *= (1.f/32768);
#endif
} }
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
@ -964,7 +987,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
analysis_info.valid = 0; analysis_info.valid = 0;
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
if (st->silk_mode.complexity >= 7 && st->Fs==48000) if (st->silk_mode.complexity >= 7 && st->Fs==48000)
{ {
frame_size = run_analysis(&st->analysis, celt_mode, pcm, analysis_pcm, frame_size = run_analysis(&st->analysis, celt_mode, pcm, analysis_pcm,
@ -982,7 +1005,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->voice_ratio = -1; st->voice_ratio = -1;
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
st->detected_bandwidth = 0; st->detected_bandwidth = 0;
if (analysis_info.valid) if (analysis_info.valid)
{ {
@ -1624,7 +1647,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->use_vbr) if (st->use_vbr)
{ {
opus_int32 bonus=0; opus_int32 bonus=0;
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
{ {
bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
@ -1726,7 +1749,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
ec_enc_shrink(&enc, nb_compr_bytes); ec_enc_shrink(&enc, nb_compr_bytes);
} }
#ifndef FIXED_POINT #ifndef DISABLE_FLOAT_API
if (redundancy || st->mode != MODE_SILK_ONLY) if (redundancy || st->mode != MODE_SILK_ONLY)
celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
#endif #endif

View file

@ -82,9 +82,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
#define OPUS_SET_FORCE_MODE_REQUEST 11002 #define OPUS_SET_FORCE_MODE_REQUEST 11002
#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
typedef void (*downmix_func)(const void *, float *, int, int, int, int, int); typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
void downmix_float(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C); void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
void downmix_int(const void *_x, float *sub, int subframe, int offset, int c1, int c2, int C); void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs,
int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering,