Expose the normalized range for reciprocal square roots in fixed-point mode. This allows subsequnt calculations to use the full precision of the result.
This commit is contained in:
parent
630ee44aaa
commit
8c7bb4c9c7
3 changed files with 42 additions and 20 deletions
|
@ -106,6 +106,7 @@ static inline celt_int16 bitexact_cos(celt_int16 x)
|
|||
#define celt_sqrt(x) ((float)sqrt(x))
|
||||
#define celt_psqrt(x) ((float)sqrt(x))
|
||||
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
|
||||
#define celt_rsqrt_norm(x) (celt_rsqrt(x))
|
||||
#define celt_acos acos
|
||||
#define celt_exp exp
|
||||
#define celt_cos_norm(x) (cos((.5f*M_PI)*(x)))
|
||||
|
@ -186,17 +187,13 @@ static inline celt_int16 celt_zlog2(celt_word32 x)
|
|||
return x <= 0 ? 0 : celt_ilog2(x);
|
||||
}
|
||||
|
||||
/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */
|
||||
static inline celt_word32 celt_rsqrt(celt_word32 x)
|
||||
/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
|
||||
static inline celt_word16 celt_rsqrt_norm(celt_word32 x)
|
||||
{
|
||||
int k;
|
||||
celt_word16 n;
|
||||
celt_word16 r;
|
||||
celt_word16 r2;
|
||||
celt_word16 y;
|
||||
celt_word32 rt;
|
||||
k = celt_ilog2(x)>>1;
|
||||
x = VSHR32(x, (k-7)<<1);
|
||||
/* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
|
||||
n = x-32768;
|
||||
/* Get a rough initial guess for the root.
|
||||
|
@ -210,15 +207,21 @@ static inline celt_word32 celt_rsqrt(celt_word32 x)
|
|||
Range of y is [-1564,1594]. */
|
||||
r2 = MULT16_16_Q15(r, r);
|
||||
y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
|
||||
/* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). */
|
||||
rt = ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
|
||||
SUB16(MULT16_16_Q15(y, 12288), 16384))));
|
||||
/* rt is now the Q14 reciprocal square root of the Q16 x, with a maximum
|
||||
/* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
|
||||
This yields the Q14 reciprocal square root of the Q16 x, with a maximum
|
||||
relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
|
||||
peak absolute error of 2.26591/16384. */
|
||||
/* Most of the error in this function comes from the following shift: */
|
||||
rt = PSHR32(rt,k);
|
||||
return rt;
|
||||
return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
|
||||
SUB16(MULT16_16_Q15(y, 12288), 16384))));
|
||||
}
|
||||
|
||||
/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */
|
||||
static inline celt_word32 celt_rsqrt(celt_word32 x)
|
||||
{
|
||||
int k;
|
||||
k = celt_ilog2(x)>>1;
|
||||
x = VSHR32(x, (k-7)<<1);
|
||||
return PSHR32(celt_rsqrt_norm(x), k);
|
||||
}
|
||||
|
||||
/** Sqrt approximation (QX input, QX/2 output) */
|
||||
|
|
|
@ -215,10 +215,21 @@ void find_spectral_pitch(const CELTMode *m, kiss_fftr_cfg fft, const struct PsyD
|
|||
Xr = MULT16_16_16(n, Xr);
|
||||
Xi = MULT16_16_16(n, Xi);
|
||||
#else
|
||||
n = celt_rsqrt(EPSILON+curve[i]);
|
||||
{
|
||||
celt_word32 t;
|
||||
#ifdef FIXED_POINT
|
||||
int k;
|
||||
#endif
|
||||
t = EPSILON+curve[i];
|
||||
#ifdef FIXED_POINT
|
||||
k = celt_ilog2(t)>>1;
|
||||
#endif
|
||||
t = VSHR32(t, (k-7)<<1);
|
||||
n = celt_rsqrt_norm(t);
|
||||
/* Pre-multiply X by n, so we can keep everything in 16 bits */
|
||||
Xr = EXTRACT16(SHR32(MULT16_16(n, Xr),3));
|
||||
Xi = EXTRACT16(SHR32(MULT16_16(n, Xi),3));
|
||||
Xr = EXTRACT16(PSHR32(MULT16_16(n, Xr),3+k));
|
||||
Xi = EXTRACT16(PSHR32(MULT16_16(n, Xi),3+k));
|
||||
}
|
||||
#endif
|
||||
/* Cross-spectrum between X and conj(Y) */
|
||||
*Xptr++ = ADD16(MULT16_16_Q15(Xr, Yptr[0]), MULT16_16_Q15(Xi,Yptr[1]));
|
||||
|
|
14
libcelt/vq.c
14
libcelt/vq.c
|
@ -103,13 +103,21 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K)
|
|||
static void normalise_residual(int * restrict iy, celt_norm * restrict X, int N, int K, celt_word32 Ryy)
|
||||
{
|
||||
int i;
|
||||
celt_word32 g;
|
||||
#ifdef FIXED_POINT
|
||||
int k;
|
||||
#endif
|
||||
celt_word32 t;
|
||||
celt_word16 g;
|
||||
|
||||
g = celt_rsqrt(Ryy);
|
||||
#ifdef FIXED_POINT
|
||||
k = celt_ilog2(Ryy)>>1;
|
||||
#endif
|
||||
t = VSHR32(Ryy, (k-7)<<1);
|
||||
g = celt_rsqrt_norm(t);
|
||||
|
||||
i=0;
|
||||
do
|
||||
X[i] = EXTRACT16(SHR32(MULT16_16(g, iy[i]),1));
|
||||
X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
|
||||
while (++i < N);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue