Cleanup, de-inlining some math functions
This commit is contained in:
parent
802f6eab9b
commit
48923ae996
7 changed files with 204 additions and 227 deletions
|
@ -15,7 +15,7 @@ lib_LTLIBRARIES = libcelt@LIBCELT_SUFFIX@.la
|
||||||
|
|
||||||
# Sources for compilation in the library
|
# Sources for compilation in the library
|
||||||
libcelt@LIBCELT_SUFFIX@_la_SOURCES = bands.c celt.c cwrs.c ecintrin.h entcode.c \
|
libcelt@LIBCELT_SUFFIX@_la_SOURCES = bands.c celt.c cwrs.c ecintrin.h entcode.c \
|
||||||
entdec.c entenc.c header.c kiss_fft.c laplace.c mdct.c \
|
entdec.c entenc.c header.c kiss_fft.c laplace.c mathops.c mdct.c \
|
||||||
modes.c pitch.c plc.c quant_bands.c rangedec.c rangeenc.c rate.c \
|
modes.c pitch.c plc.c quant_bands.c rangedec.c rangeenc.c rate.c \
|
||||||
vq.c
|
vq.c
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,22 @@
|
||||||
#include "mathops.h"
|
#include "mathops.h"
|
||||||
#include "rate.h"
|
#include "rate.h"
|
||||||
|
|
||||||
|
/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
|
||||||
|
with this approximation is important because it has an impact on the bit allocation */
|
||||||
|
static celt_int16 bitexact_cos(celt_int16 x)
|
||||||
|
{
|
||||||
|
celt_int32 tmp;
|
||||||
|
celt_int16 x2;
|
||||||
|
tmp = (4096+((celt_int32)(x)*(x)))>>13;
|
||||||
|
if (tmp > 32767)
|
||||||
|
tmp = 32767;
|
||||||
|
x2 = tmp;
|
||||||
|
x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
|
||||||
|
if (x2 > 32766)
|
||||||
|
x2 = 32766;
|
||||||
|
return 1+x2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef FIXED_POINT
|
#ifdef FIXED_POINT
|
||||||
/* Compute the amplitude (sqrt energy) in each of the bands */
|
/* Compute the amplitude (sqrt energy) in each of the bands */
|
||||||
|
|
179
libcelt/mathops.c
Normal file
179
libcelt/mathops.c
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
/* Copyright (c) 2002-2008 Jean-Marc Valin
|
||||||
|
Copyright (c) 2007-2008 CSIRO
|
||||||
|
Copyright (c) 2007-2009 Xiph.Org Foundation
|
||||||
|
Written by Jean-Marc Valin */
|
||||||
|
/**
|
||||||
|
@file mathops.h
|
||||||
|
@brief Various math functions
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
- Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
- Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
- Neither the name of the Xiph.org Foundation nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||||
|
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
#include "config.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "mathops.h"
|
||||||
|
|
||||||
|
#ifdef FIXED_POINT
|
||||||
|
|
||||||
|
celt_word32 frac_div32(celt_word32 a, celt_word32 b)
|
||||||
|
{
|
||||||
|
celt_word16 rcp;
|
||||||
|
celt_word32 result, rem;
|
||||||
|
int shift = 30-celt_ilog2(b);
|
||||||
|
a = SHL32(a,shift);
|
||||||
|
b = SHL32(b,shift);
|
||||||
|
|
||||||
|
/* 16-bit reciprocal */
|
||||||
|
rcp = ROUND16(celt_rcp(ROUND16(b,16)),2);
|
||||||
|
result = SHL32(MULT16_32_Q15(rcp, a),1);
|
||||||
|
rem = a-MULT32_32_Q31(result, b);
|
||||||
|
result += SHL32(MULT16_32_Q15(rcp, rem),1);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
|
||||||
|
celt_word16 celt_rsqrt_norm(celt_word32 x)
|
||||||
|
{
|
||||||
|
celt_word16 n;
|
||||||
|
celt_word16 r;
|
||||||
|
celt_word16 r2;
|
||||||
|
celt_word16 y;
|
||||||
|
/* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
|
||||||
|
n = x-32768;
|
||||||
|
/* Get a rough initial guess for the root.
|
||||||
|
The optimal minimax quadratic approximation (using relative error) is
|
||||||
|
r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
|
||||||
|
Coefficients here, and the final result r, are Q14.*/
|
||||||
|
r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
|
||||||
|
/* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
|
||||||
|
We can compute the result from n and r using Q15 multiplies with some
|
||||||
|
adjustment, carefully done to avoid overflow.
|
||||||
|
Range of y is [-1564,1594]. */
|
||||||
|
r2 = MULT16_16_Q15(r, r);
|
||||||
|
y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
|
||||||
|
/* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
|
||||||
|
This yields the Q14 reciprocal square root of the Q16 x, with a maximum
|
||||||
|
relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
|
||||||
|
peak absolute error of 2.26591/16384. */
|
||||||
|
return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
|
||||||
|
SUB16(MULT16_16_Q15(y, 12288), 16384))));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sqrt approximation (QX input, QX/2 output) */
|
||||||
|
celt_word32 celt_sqrt(celt_word32 x)
|
||||||
|
{
|
||||||
|
int k;
|
||||||
|
celt_word16 n;
|
||||||
|
celt_word32 rt;
|
||||||
|
static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
|
||||||
|
if (x==0)
|
||||||
|
return 0;
|
||||||
|
k = (celt_ilog2(x)>>1)-7;
|
||||||
|
x = VSHR32(x, (k<<1));
|
||||||
|
n = x-32768;
|
||||||
|
rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
||||||
|
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
||||||
|
rt = VSHR32(rt,7-k);
|
||||||
|
return rt;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define L1 32767
|
||||||
|
#define L2 -7651
|
||||||
|
#define L3 8277
|
||||||
|
#define L4 -626
|
||||||
|
|
||||||
|
static inline celt_word16 _celt_cos_pi_2(celt_word16 x)
|
||||||
|
{
|
||||||
|
celt_word16 x2;
|
||||||
|
|
||||||
|
x2 = MULT16_16_P15(x,x);
|
||||||
|
return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
|
||||||
|
))))))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef L1
|
||||||
|
#undef L2
|
||||||
|
#undef L3
|
||||||
|
#undef L4
|
||||||
|
|
||||||
|
celt_word16 celt_cos_norm(celt_word32 x)
|
||||||
|
{
|
||||||
|
x = x&0x0001ffff;
|
||||||
|
if (x>SHL32(EXTEND32(1), 16))
|
||||||
|
x = SUB32(SHL32(EXTEND32(1), 17),x);
|
||||||
|
if (x&0x00007fff)
|
||||||
|
{
|
||||||
|
if (x<SHL32(EXTEND32(1), 15))
|
||||||
|
{
|
||||||
|
return _celt_cos_pi_2(EXTRACT16(x));
|
||||||
|
} else {
|
||||||
|
return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (x&0x0000ffff)
|
||||||
|
return 0;
|
||||||
|
else if (x&0x0001ffff)
|
||||||
|
return -32767;
|
||||||
|
else
|
||||||
|
return 32767;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Reciprocal approximation (Q15 input, Q16 output) */
|
||||||
|
celt_word32 celt_rcp(celt_word32 x)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
celt_word16 n;
|
||||||
|
celt_word16 r;
|
||||||
|
celt_assert2(x>0, "celt_rcp() only defined for positive values");
|
||||||
|
i = celt_ilog2(x);
|
||||||
|
/* n is Q15 with range [0,1). */
|
||||||
|
n = VSHR32(x,i-15)-32768;
|
||||||
|
/* Start with a linear approximation:
|
||||||
|
r = 1.8823529411764706-0.9411764705882353*n.
|
||||||
|
The coefficients and the result are Q14 in the range [15420,30840].*/
|
||||||
|
r = ADD16(30840, MULT16_16_Q15(-15420, n));
|
||||||
|
/* Perform two Newton iterations:
|
||||||
|
r -= r*((r*n)-1.Q15)
|
||||||
|
= r*((r*n)+(r-1.Q15)). */
|
||||||
|
r = SUB16(r, MULT16_16_Q15(r,
|
||||||
|
ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
|
||||||
|
/* We subtract an extra 1 in the second iteration to avoid overflow; it also
|
||||||
|
neatly compensates for truncation error in the rest of the process. */
|
||||||
|
r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
|
||||||
|
ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
|
||||||
|
/* r is now the Q15 solution to 2/(n+1), with a maximum relative error
|
||||||
|
of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
|
||||||
|
error of 1.24665/32768. */
|
||||||
|
return VSHR32(EXTEND32(r),i-16);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -42,66 +42,13 @@
|
||||||
#include "entcode.h"
|
#include "entcode.h"
|
||||||
#include "os_support.h"
|
#include "os_support.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef OVERRIDE_FIND_MAX16
|
|
||||||
static inline int find_max16(celt_word16 *x, int len)
|
|
||||||
{
|
|
||||||
celt_word16 max_corr=-VERY_LARGE16;
|
|
||||||
int i, id = 0;
|
|
||||||
for (i=0;i<len;i++)
|
|
||||||
{
|
|
||||||
if (x[i] > max_corr)
|
|
||||||
{
|
|
||||||
id = i;
|
|
||||||
max_corr = x[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef OVERRIDE_FIND_MAX32
|
|
||||||
static inline int find_max32(celt_word32 *x, int len)
|
|
||||||
{
|
|
||||||
celt_word32 max_corr=-VERY_LARGE32;
|
|
||||||
int i, id = 0;
|
|
||||||
for (i=0;i<len;i++)
|
|
||||||
{
|
|
||||||
if (x[i] > max_corr)
|
|
||||||
{
|
|
||||||
id = i;
|
|
||||||
max_corr = x[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
|
/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
|
||||||
#define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15)
|
#define FRAC_MUL16(a,b) ((16384+((celt_int32)(celt_int16)(a)*(celt_int16)(b)))>>15)
|
||||||
|
|
||||||
/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
|
|
||||||
with this approximation is important because it has an impact on the bit allocation */
|
|
||||||
static inline celt_int16 bitexact_cos(celt_int16 x)
|
|
||||||
{
|
|
||||||
celt_int32 tmp;
|
|
||||||
celt_int16 x2;
|
|
||||||
tmp = (4096+((celt_int32)(x)*(x)))>>13;
|
|
||||||
if (tmp > 32767)
|
|
||||||
tmp = 32767;
|
|
||||||
x2 = tmp;
|
|
||||||
x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
|
|
||||||
if (x2 > 32766)
|
|
||||||
x2 = 32766;
|
|
||||||
return 1+x2;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef FIXED_POINT
|
#ifndef FIXED_POINT
|
||||||
|
|
||||||
#define celt_sqrt(x) ((float)sqrt(x))
|
#define celt_sqrt(x) ((float)sqrt(x))
|
||||||
#define celt_psqrt(x) ((float)sqrt(x))
|
|
||||||
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
|
#define celt_rsqrt(x) (1.f/celt_sqrt(x))
|
||||||
#define celt_rsqrt_norm(x) (celt_rsqrt(x))
|
#define celt_rsqrt_norm(x) (celt_rsqrt(x))
|
||||||
#define celt_acos acos
|
#define celt_acos acos
|
||||||
|
@ -195,119 +142,12 @@ static inline celt_int16 celt_zlog2(celt_word32 x)
|
||||||
return x <= 0 ? 0 : celt_ilog2(x);
|
return x <= 0 ? 0 : celt_ilog2(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
|
celt_word16 celt_rsqrt_norm(celt_word32 x);
|
||||||
static inline celt_word16 celt_rsqrt_norm(celt_word32 x)
|
|
||||||
{
|
|
||||||
celt_word16 n;
|
|
||||||
celt_word16 r;
|
|
||||||
celt_word16 r2;
|
|
||||||
celt_word16 y;
|
|
||||||
/* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
|
|
||||||
n = x-32768;
|
|
||||||
/* Get a rough initial guess for the root.
|
|
||||||
The optimal minimax quadratic approximation (using relative error) is
|
|
||||||
r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
|
|
||||||
Coefficients here, and the final result r, are Q14.*/
|
|
||||||
r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
|
|
||||||
/* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
|
|
||||||
We can compute the result from n and r using Q15 multiplies with some
|
|
||||||
adjustment, carefully done to avoid overflow.
|
|
||||||
Range of y is [-1564,1594]. */
|
|
||||||
r2 = MULT16_16_Q15(r, r);
|
|
||||||
y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
|
|
||||||
/* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
|
|
||||||
This yields the Q14 reciprocal square root of the Q16 x, with a maximum
|
|
||||||
relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
|
|
||||||
peak absolute error of 2.26591/16384. */
|
|
||||||
return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
|
|
||||||
SUB16(MULT16_16_Q15(y, 12288), 16384))));
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Reciprocal sqrt approximation (Q30 input, Q0 output or equivalent) */
|
celt_word32 celt_sqrt(celt_word32 x);
|
||||||
static inline celt_word32 celt_rsqrt(celt_word32 x)
|
|
||||||
{
|
|
||||||
int k;
|
|
||||||
k = celt_ilog2(x)>>1;
|
|
||||||
x = VSHR32(x, (k-7)<<1);
|
|
||||||
return PSHR32(celt_rsqrt_norm(x), k);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Sqrt approximation (QX input, QX/2 output) */
|
celt_word16 celt_cos_norm(celt_word32 x);
|
||||||
static inline celt_word32 celt_sqrt(celt_word32 x)
|
|
||||||
{
|
|
||||||
int k;
|
|
||||||
celt_word16 n;
|
|
||||||
celt_word32 rt;
|
|
||||||
static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
|
|
||||||
if (x==0)
|
|
||||||
return 0;
|
|
||||||
k = (celt_ilog2(x)>>1)-7;
|
|
||||||
x = VSHR32(x, (k<<1));
|
|
||||||
n = x-32768;
|
|
||||||
rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
|
||||||
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
|
||||||
rt = VSHR32(rt,7-k);
|
|
||||||
return rt;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Sqrt approximation (QX input, QX/2 output) that assumes that the input is
|
|
||||||
strictly positive */
|
|
||||||
static inline celt_word32 celt_psqrt(celt_word32 x)
|
|
||||||
{
|
|
||||||
int k;
|
|
||||||
celt_word16 n;
|
|
||||||
celt_word32 rt;
|
|
||||||
static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664};
|
|
||||||
k = (celt_ilog2(x)>>1)-7;
|
|
||||||
x = VSHR32(x, (k<<1));
|
|
||||||
n = x-32768;
|
|
||||||
rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
|
||||||
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
|
||||||
rt = VSHR32(rt,7-k);
|
|
||||||
return rt;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define L1 32767
|
|
||||||
#define L2 -7651
|
|
||||||
#define L3 8277
|
|
||||||
#define L4 -626
|
|
||||||
|
|
||||||
static inline celt_word16 _celt_cos_pi_2(celt_word16 x)
|
|
||||||
{
|
|
||||||
celt_word16 x2;
|
|
||||||
|
|
||||||
x2 = MULT16_16_P15(x,x);
|
|
||||||
return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
|
|
||||||
))))))));
|
|
||||||
}
|
|
||||||
|
|
||||||
#undef L1
|
|
||||||
#undef L2
|
|
||||||
#undef L3
|
|
||||||
#undef L4
|
|
||||||
|
|
||||||
static inline celt_word16 celt_cos_norm(celt_word32 x)
|
|
||||||
{
|
|
||||||
x = x&0x0001ffff;
|
|
||||||
if (x>SHL32(EXTEND32(1), 16))
|
|
||||||
x = SUB32(SHL32(EXTEND32(1), 17),x);
|
|
||||||
if (x&0x00007fff)
|
|
||||||
{
|
|
||||||
if (x<SHL32(EXTEND32(1), 15))
|
|
||||||
{
|
|
||||||
return _celt_cos_pi_2(EXTRACT16(x));
|
|
||||||
} else {
|
|
||||||
return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (x&0x0000ffff)
|
|
||||||
return 0;
|
|
||||||
else if (x&0x0001ffff)
|
|
||||||
return -32767;
|
|
||||||
else
|
|
||||||
return 32767;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline celt_word16 celt_log2(celt_word32 x)
|
static inline celt_word16 celt_log2(celt_word32 x)
|
||||||
{
|
{
|
||||||
|
@ -349,52 +189,11 @@ static inline celt_word32 celt_exp2(celt_word16 x)
|
||||||
return VSHR32(EXTEND32(frac), -integer-2);
|
return VSHR32(EXTEND32(frac), -integer-2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Reciprocal approximation (Q15 input, Q16 output) */
|
celt_word32 celt_rcp(celt_word32 x);
|
||||||
static inline celt_word32 celt_rcp(celt_word32 x)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
celt_word16 n;
|
|
||||||
celt_word16 r;
|
|
||||||
celt_assert2(x>0, "celt_rcp() only defined for positive values");
|
|
||||||
i = celt_ilog2(x);
|
|
||||||
/* n is Q15 with range [0,1). */
|
|
||||||
n = VSHR32(x,i-15)-32768;
|
|
||||||
/* Start with a linear approximation:
|
|
||||||
r = 1.8823529411764706-0.9411764705882353*n.
|
|
||||||
The coefficients and the result are Q14 in the range [15420,30840].*/
|
|
||||||
r = ADD16(30840, MULT16_16_Q15(-15420, n));
|
|
||||||
/* Perform two Newton iterations:
|
|
||||||
r -= r*((r*n)-1.Q15)
|
|
||||||
= r*((r*n)+(r-1.Q15)). */
|
|
||||||
r = SUB16(r, MULT16_16_Q15(r,
|
|
||||||
ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
|
|
||||||
/* We subtract an extra 1 in the second iteration to avoid overflow; it also
|
|
||||||
neatly compensates for truncation error in the rest of the process. */
|
|
||||||
r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
|
|
||||||
ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
|
|
||||||
/* r is now the Q15 solution to 2/(n+1), with a maximum relative error
|
|
||||||
of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
|
|
||||||
error of 1.24665/32768. */
|
|
||||||
return VSHR32(EXTEND32(r),i-16);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define celt_div(a,b) MULT32_32_Q31((celt_word32)(a),celt_rcp(b))
|
#define celt_div(a,b) MULT32_32_Q31((celt_word32)(a),celt_rcp(b))
|
||||||
|
|
||||||
static inline celt_word32 frac_div32(celt_word32 a, celt_word32 b)
|
celt_word32 frac_div32(celt_word32 a, celt_word32 b);
|
||||||
{
|
|
||||||
celt_word16 rcp;
|
|
||||||
celt_word32 result, rem;
|
|
||||||
int shift = 30-celt_ilog2(b);
|
|
||||||
a = SHL32(a,shift);
|
|
||||||
b = SHL32(b,shift);
|
|
||||||
|
|
||||||
/* 16-bit reciprocal */
|
|
||||||
rcp = ROUND16(celt_rcp(ROUND16(b,16)),2);
|
|
||||||
result = SHL32(MULT16_32_Q15(rcp, a),1);
|
|
||||||
rem = a-MULT32_32_Q31(result, b);
|
|
||||||
result += SHL32(MULT16_32_Q15(rcp, rem),1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define M1 32767
|
#define M1 32767
|
||||||
#define M2 -21
|
#define M2 -21
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#define CELT_C
|
#define CELT_C
|
||||||
#include "../libcelt/stack_alloc.h"
|
#include "../libcelt/stack_alloc.h"
|
||||||
#include "../libcelt/kiss_fft.c"
|
#include "../libcelt/kiss_fft.c"
|
||||||
|
#include "../libcelt/mathops.c"
|
||||||
#include "../libcelt/entcode.c"
|
#include "../libcelt/entcode.c"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "mathops.h"
|
#include "mathops.c"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
@ -56,24 +56,6 @@ void testsqrt(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void testrsqrt(void)
|
|
||||||
{
|
|
||||||
celt_int32 i;
|
|
||||||
for (i=1;i<=2000000;i++)
|
|
||||||
{
|
|
||||||
double ratio;
|
|
||||||
celt_word16 val;
|
|
||||||
val = celt_rsqrt(i);
|
|
||||||
ratio = val*sqrt(i)/Q15ONE;
|
|
||||||
if (fabs(ratio - 1) > .05)
|
|
||||||
{
|
|
||||||
fprintf (stderr, "rsqrt failed: rsqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio);
|
|
||||||
ret = 1;
|
|
||||||
}
|
|
||||||
i+= i>>10;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef FIXED_POINT
|
#ifndef FIXED_POINT
|
||||||
void testlog2(void)
|
void testlog2(void)
|
||||||
{
|
{
|
||||||
|
@ -179,7 +161,6 @@ int main(void)
|
||||||
{
|
{
|
||||||
testdiv();
|
testdiv();
|
||||||
testsqrt();
|
testsqrt();
|
||||||
testrsqrt();
|
|
||||||
testlog2();
|
testlog2();
|
||||||
testexp2();
|
testexp2();
|
||||||
testexp2log2();
|
testexp2log2();
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "../libcelt/kiss_fft.c"
|
#include "../libcelt/kiss_fft.c"
|
||||||
#include "../libcelt/mdct.c"
|
#include "../libcelt/mdct.c"
|
||||||
|
#include "../libcelt/mathops.c"
|
||||||
|
|
||||||
#ifndef M_PI
|
#ifndef M_PI
|
||||||
#define M_PI 3.141592653
|
#define M_PI 3.141592653
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue