Tonality estimation code

This commit is contained in:
Jean-Marc Valin 2008-10-15 07:29:58 -04:00
parent 01e177377c
commit dffd9449b5
5 changed files with 63 additions and 7 deletions

View file

@ -471,8 +471,16 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
}
/* Pitch analysis: we do it early to save on the peak stack space */
if (st->pitch_enabled && !shortBlocks)
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
{
#ifdef EXP_PSY
VARDECL(celt_word16_t, X);
ALLOC(X, MAX_PERIOD/2, celt_word16_t);
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, X, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
compute_tonality(st->mode, X, st->psy_mem, MAX_PERIOD);
#else
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
#endif
}
ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
/*for (i=0;i<(B+1)*C*N;i++) printf ("%f(%d) ", in[i], i); printf ("\n");*/
@ -480,13 +488,13 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
compute_mdcts(st->mode, shortBlocks, in, freq);
#ifdef EXP_PSY
CELT_MOVE(st->psy_mem, st->out_mem+N, MAX_PERIOD+st->overlap-N);
/*CELT_MOVE(st->psy_mem, st->out_mem+N, MAX_PERIOD+st->overlap-N);
for (i=0;i<N;i++)
st->psy_mem[MAX_PERIOD+st->overlap-N+i] = in[C*(st->overlap+i)];
for (c=1;c<C;c++)
for (i=0;i<N;i++)
st->psy_mem[MAX_PERIOD+st->overlap-N+i] += in[C*(st->overlap+i)+c];
*/
ALLOC(mask, N, celt_sig_t);
compute_mdct_masking(&st->psy, freq, st->psy_mem, mask, C*N);
@ -846,7 +854,7 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16_t * restrict
compute_mdcts(st->mode, st->mode->window, st->out_mem+pitch_index*C, freq);
#else
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, len, MAX_PERIOD-len-100, &pitch_index);
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, st->out_mem+MAX_PERIOD-len, st->out_mem, st->mode->window, NULL, len, MAX_PERIOD-len-100, &pitch_index);
pitch_index = MAX_PERIOD-len-pitch_index;
offset = MAX_PERIOD-pitch_index;
while (offset+len >= MAX_PERIOD)

View file

@ -104,7 +104,7 @@ static void normalise16(celt_word16_t *x, int len, celt_word16_t val)
#define INPUT_SHIFT 15
void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyDecay *decay, const celt_sig_t * restrict x, const celt_sig_t * restrict y, const celt_word16_t * restrict window, int len, int max_pitch, int *pitch)
void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyDecay *decay, const celt_sig_t * restrict x, const celt_sig_t * restrict y, const celt_word16_t * restrict window, celt_word16_t * restrict spectrum, int len, int max_pitch, int *pitch)
{
int c, i;
VARDECL(celt_word16_t, _X);
@ -158,6 +158,14 @@ void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyD
/* Forward real FFT (in-place) */
real16_fft_inplace(fft, X, lag);
if (spectrum)
{
for (i=0;i<lag/4;i++)
{
spectrum[2*i] = X[4*i];
spectrum[2*i+1] = X[4*i+1];
}
}
#ifndef SHORTCUTS
compute_masking(decay, X, curve, lag);
#endif

View file

@ -48,6 +48,6 @@ void pitch_state_free(kiss_fftr_cfg st);
/** Find the optimal delay for the pitch prediction. Computation is
done in the frequency domain, both to save time and to make it
easier to apply psychoacoustic weighting */
void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyDecay *decay, const celt_sig_t *x, const celt_sig_t *y, const celt_word16_t *window, int len, int max_pitch, int *pitch);
void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyDecay *decay, const celt_sig_t *x, const celt_sig_t *y, const celt_word16_t *window, celt_word16_t * restrict X, int len, int max_pitch, int *pitch);
#endif

View file

@ -38,6 +38,7 @@
#include "os_support.h"
#include "arch.h"
#include "stack_alloc.h"
#include "mathops.h"
/* The Vorbis freq<->Bark mapping */
#define toBARK(n) (13.1f*atan(.00074f*(n))+2.24f*atan((n)*(n)*1.85e-8f)+1e-4f*(n))
@ -162,4 +163,40 @@ void compute_mdct_masking(const struct PsyDecay *decay, celt_word32_t *X, celt_w
spreading_func(decay, mask, len);
RESTORE_STACK;
}
void compute_tonality(const CELTMode *m, celt_word16_t * restrict X, celt_word16_t * mem, int len)
{
int i;
celt_word16_t norm_1;
celt_word16_t *mem2;
int N = len>>2;
mem2 = mem+2*N;
X[0] = 0;
X[1] = 0;
for (i=1;i<N;i++)
{
celt_word16_t re, im, re2, im2;
re = X[2*i];
im = X[2*i+1];
/* Normalise spectrum */
norm_1 = celt_rsqrt(MAC16_16(MULT16_16(re,re), im,im));
re = MULT16_16(re, norm_1);
im = MULT16_16(im, norm_1);
/* Phase derivative */
re2 = re*mem[2*i] + im*mem[2*i+1];
im2 = im*mem[2*i] - re*mem[2*i+1];
mem[2*i] = re;
mem[2*i+1] = im;
/* Phase second derivative */
re = re2*mem2[2*i] + im2*mem2[2*i+1];
im = im2*mem2[2*i] - re2*mem2[2*i+1];
mem2[2*i] = re2;
mem2[2*i+1] = im2;
/*printf ("%f ", re);*/
X[2*i] = re;
X[2*i+1] = im;
}
/*printf ("\n");*/
}
#endif

View file

@ -32,6 +32,7 @@
#define PSY_H
#include "arch.h"
#include "celt.h"
struct PsyDecay {
/*celt_word16_t *decayL;*/
@ -50,4 +51,6 @@ void compute_masking(const struct PsyDecay *decay, celt_word16_t *X, celt_mask_t
/** Compute the masking curve for an input (MDCT) spectrum X */
void compute_mdct_masking(const struct PsyDecay *decay, celt_word32_t *X, celt_word16_t *long_window, celt_mask_t *mask, int len);
void compute_tonality(const CELTMode *m, celt_word16_t * restrict X, celt_word16_t * mem, int len);
#endif /* PSY_H */