Misc changes to address Robert Sparks' comments

See http://www.ietf.org/mail-archive/web/codec/current/msg02833.html
Still more changes to come
This commit is contained in:
Jean-Marc Valin 2012-04-20 10:26:08 -04:00
parent 8365b5d00d
commit 72273000ec
9 changed files with 158 additions and 92 deletions

View file

@ -36,8 +36,7 @@ LIBPREFIX = lib
LIBSUFFIX = .a LIBSUFFIX = .a
OBJSUFFIX = .o OBJSUFFIX = .o
CC = $(TOOLCHAIN_PREFIX)gcc$(TOOLCHAIN_SUFFIX) CC = $(TOOLCHAIN_PREFIX)cc$(TOOLCHAIN_SUFFIX)
CXX = $(TOOLCHAIN_PREFIX)g++$(TOOLCHAIN_SUFFIX)
AR = $(TOOLCHAIN_PREFIX)ar AR = $(TOOLCHAIN_PREFIX)ar
RANLIB = $(TOOLCHAIN_PREFIX)ranlib RANLIB = $(TOOLCHAIN_PREFIX)ranlib
CP = $(TOOLCHAIN_PREFIX)cp CP = $(TOOLCHAIN_PREFIX)cp
@ -79,7 +78,6 @@ LDFLAGS += $(call ldflags-from-ldlibdirs,$(LDLIBDIRS))
LDLIBS += $(call ldlibs-from-libs,$(LIBS)) LDLIBS += $(call ldlibs-from-libs,$(LIBS))
COMPILE.c.cmdline = $(CC) -c $(CFLAGS) -o $@ $< COMPILE.c.cmdline = $(CC) -c $(CFLAGS) -o $@ $<
COMPILE.cpp.cmdline = $(CXX) -c $(CFLAGS) -o $@ $<
LINK.o = $(CC) $(LDPREFLAGS) $(LDFLAGS) LINK.o = $(CC) $(LDPREFLAGS) $(LDFLAGS)
LINK.o.cmdline = $(LINK.o) $^ $(LDLIBS) -o $@$(EXESUFFIX) LINK.o.cmdline = $(LINK.o) $^ $(LDLIBS) -o $@$(EXESUFFIX)

View file

@ -6,10 +6,11 @@ If this does not work, or if you want to change the default configuration (e.g.,
to compile for a fixed-point architecture), simply edit the options in the to compile for a fixed-point architecture), simply edit the options in the
Makefile. Makefile.
To build from the git repository instead of using this draft, follow these To build from the git repository instead of using this RFC, follow these
steps: steps:
1) Clone the repository: 1) Clone the repository (latest implementation of this standard at the time
of publication)
% git clone git://git.opus-codec.org/opus.git % git clone git://git.opus-codec.org/opus.git
% cd opus % cd opus

View file

@ -99,8 +99,7 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)),
EXTRACT16(VSHR32(X[j+c*N],shift))); EXTRACT16(VSHR32(X[j+c*N],shift)));
} while (++j<M*eBands[i+1]); } while (++j<M*eBands[i+1]);
/* We're adding one here to make damn sure we never end up with a pitch vector that's /* We're adding one here to ensure the normalized band isn't larger than unity norm */
larger than unity norm */
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
} else { } else {
bandE[i+c*m->nbEBands] = EPSILON; bandE[i+c*m->nbEBands] = EPSILON;

View file

@ -37,19 +37,6 @@
extern "C" { extern "C" {
#endif #endif
/*
ATTENTION!
If you would like a :
-- a utility that will handle the caching of fft objects
-- real-only (no imaginary time component ) FFT
-- a multi-dimensional FFT
-- a command-line utility to perform ffts
-- a command-line utility to perform fast-convolution filtering
Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
in the tools/ directory.
*/
#ifdef USE_SIMD #ifdef USE_SIMD
# include <xmmintrin.h> # include <xmmintrin.h>
# define kiss_fft_scalar __m128 # define kiss_fft_scalar __m128

View file

@ -70,14 +70,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
opus_val16 gain, theta; opus_val16 gain, theta;
int stride2=0; int stride2=0;
int factor; int factor;
/*int i;
if (len>=30)
{
for (i=0;i<len;i++)
X[i] = 0;
X[14] = 1;
K=5;
}*/
if (2*K>=len || spread==SPREAD_NONE) if (2*K>=len || spread==SPREAD_NONE)
return; return;
factor = SPREAD_FACTOR[spread-1]; factor = SPREAD_FACTOR[spread-1];
@ -91,9 +84,8 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
if (len>=8*stride) if (len>=8*stride)
{ {
stride2 = 1; stride2 = 1;
/* This is just a simple way of computing sqrt(len/stride) with rounding. /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
It's basically incrementing long as (stride2+0.5)^2 < len/stride. It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
I _think_ it is bit-exact */
while ((stride2*stride2+stride2)*stride + (stride>>2) < len) while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
stride2++; stride2++;
} }
@ -113,13 +105,6 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
exp_rotation1(X+i*len, len, stride2, s, -c); exp_rotation1(X+i*len, len, stride2, s, -c);
} }
} }
/*if (len>=30)
{
for (i=0;i<len;i++)
printf ("%f ", X[i]);
printf ("\n");
exit(0);
}*/
} }
/** Takes the pitch vector and the decoded residual vector, computes the gain /** Takes the pitch vector and the decoded residual vector, computes the gain
@ -233,7 +218,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
while (++j<N); while (++j<N);
sum = QCONST16(1.f,14); sum = QCONST16(1.f,14);
} }
/* Do we have sufficient accuracy here? */
rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
j=0; do { j=0; do {
#ifdef FIXED_POINT #ifdef FIXED_POINT

View file

@ -80,8 +80,8 @@ Opus uses both linear prediction (LP) and the Modified Discrete Cosine
The Opus codec is a real-time interactive audio codec designed to meet the requirements The Opus codec is a real-time interactive audio codec designed to meet the requirements
described in <xref target="requirements"></xref>. described in <xref target="requirements"></xref>.
It is composed of a linear It is composed of a linear
prediction (LP)-based layer and a Modified Discrete Cosine Transform prediction (LP)-based <xref target="LPC"/> layer and a Modified Discrete Cosine Transform
(MDCT)-based layer. (MDCT)-based <xref target="MDCT"/> layer.
The main idea behind using two layers is that in speech, linear prediction The main idea behind using two layers is that in speech, linear prediction
techniques (such as CELP) code low frequencies more efficiently than transform techniques (such as CELP) code low frequencies more efficiently than transform
(e.g., MDCT) domain techniques, while the situation is reversed for music and (e.g., MDCT) domain techniques, while the situation is reversed for music and
@ -273,8 +273,7 @@ Therefore, if an application wishes to process a signal sampled at 32&nbsp;kHz,
</t> </t>
<t> <t>
The LP layer is based on the The LP layer is based on the SILK codec
<eref target='http://developer.skype.com/silk'>SILK</eref> codec
<xref target="SILK"></xref>. <xref target="SILK"></xref>.
It supports NB, MB, or WB audio and frame sizes from 10&nbsp;ms to 60&nbsp;ms, It supports NB, MB, or WB audio and frame sizes from 10&nbsp;ms to 60&nbsp;ms,
and requires an additional 5&nbsp;ms look-ahead for noise shaping estimation. and requires an additional 5&nbsp;ms look-ahead for noise shaping estimation.
@ -290,9 +289,7 @@ This document does not serve to define that format, but those interested in the
</t> </t>
<t> <t>
The MDCT layer is based on the The MDCT layer is based on the CELT codec <xref target="CELT"></xref>.
<eref target='http://www.celt-codec.org/'>CELT</eref> codec
<xref target="CELT"></xref>.
It supports NB, WB, SWB, or FB audio and frame sizes from 2.5&nbsp;ms to It supports NB, WB, SWB, or FB audio and frame sizes from 2.5&nbsp;ms to
20&nbsp;ms, and requires an additional 2.5&nbsp;ms look-ahead due to the 20&nbsp;ms, and requires an additional 2.5&nbsp;ms look-ahead due to the
overlapping MDCT windows. overlapping MDCT windows.
@ -436,7 +433,7 @@ encoder, the complexity is selected using an integer from 0 to 10, where
0 is the lowest complexity and 10 is the highest. Examples of 0 is the lowest complexity and 10 is the highest. Examples of
computations for which such trade-offs may occur are: computations for which such trade-offs may occur are:
<list style="symbols"> <list style="symbols">
<t>The order of the pitch analysis whitening filter,</t> <t>The order of the pitch analysis whitening filter <xref target="Whitening"/>,</t>
<t>The order of the short-term noise shaping filter,</t> <t>The order of the short-term noise shaping filter,</t>
<t>The number of states in delayed decision quantization of the <t>The number of states in delayed decision quantization of the
residual signal, and</t> residual signal, and</t>
@ -474,9 +471,8 @@ the default. However, in some (rare) applications, constant bitrate (CBR)
is required. There are two main reasons to operate in CBR mode: is required. There are two main reasons to operate in CBR mode:
<list style="symbols"> <list style="symbols">
<t>When the transport only supports a fixed size for each compressed frame</t> <t>When the transport only supports a fixed size for each compressed frame</t>
<t>When security is important <spanx style="emph">and</spanx> the input audio <t>When encryption is used for an audio stream that is either highly constrained
not a normal conversation but is highly constrained (e.g. yes/no, recorded prompts) (e.g. yes/no, recorded prompts) or highly sensitive <xref target="SRTP-VBR"></xref> </t>
<xref target="SRTP-VBR"></xref> </t>
</list> </list>
When low-latency transmission is required over a relatively slow connection, then When low-latency transmission is required over a relatively slow connection, then
@ -734,9 +730,9 @@ This makes, for example, a 2-byte code 2 packet with a second byte in the range
</figure> </figure>
</section> </section>
<section title="Code 3: An Arbitrary Number of Frames in the Packet"> <section title="Code 3: A Signaled Number of Frames in the Packet">
<t> <t>
Code 3 packets may encode an arbitrary number of frames, as well as additional Code 3 packets signal the number of frames, as well as additional
padding, called "Opus padding" to indicate that this padding is added at the padding, called "Opus padding" to indicate that this padding is added at the
Opus layer, rather than at the transport layer. Opus layer, rather than at the transport layer.
Code 3 packets MUST have at least 2 bytes. Code 3 packets MUST have at least 2 bytes.
@ -1271,10 +1267,10 @@ The raw bits used by the CELT layer are packed at the end of the packet, with
The reference implementation reads them using ec_dec_bits() (entdec.c). The reference implementation reads them using ec_dec_bits() (entdec.c).
Because the range decoder must read several bytes ahead in the stream, as Because the range decoder must read several bytes ahead in the stream, as
described in <xref target="range-decoder-renorm"/>, the input consumed by the described in <xref target="range-decoder-renorm"/>, the input consumed by the
raw bits MAY overlap with the input consumed by the range coder, and a decoder raw bits may overlap with the input consumed by the range coder, and a decoder
MUST allow this. MUST allow this.
The format should render it impossible to attempt to read more raw bits than The format should render it impossible to attempt to read more raw bits than
there are actual bits in the frame, though a decoder MAY wish to check for there are actual bits in the frame, though a decoder may wish to check for
this and report an error. this and report an error.
</t> </t>
</section> </section>
@ -1388,9 +1384,9 @@ Reading raw bits increases nbits_total by the number of raw bits read.
<section anchor="ec_tell" title="ec_tell()"> <section anchor="ec_tell" title="ec_tell()">
<t> <t>
The whole number of bits buffered in rng may be estimated via l = ilog(rng). The whole number of bits buffered in rng may be estimated via lg = ilog(rng).
ec_tell() then becomes a simple matter of removing these bits from the total. ec_tell() then becomes a simple matter of removing these bits from the total.
It returns (nbits_total - l). It returns (nbits_total - lg).
</t> </t>
<t> <t>
In a newly initialized decoder, before any symbols have been read, this reports In a newly initialized decoder, before any symbols have been read, this reports
@ -1403,7 +1399,7 @@ This is the bit reserved for termination of the encoder.
<t> <t>
ec_tell_frac() estimates the number of bits buffered in rng to fractional ec_tell_frac() estimates the number of bits buffered in rng to fractional
precision. precision.
Since rng must be greater than 2**23 after renormalization, l must be at least Since rng must be greater than 2**23 after renormalization, lg must be at least
24. 24.
Let Let
<figure align="center"> <figure align="center">
@ -1414,7 +1410,7 @@ r_Q15 = rng >> (l-16) ,
</figure> </figure>
so that 32768 &lt;= r_Q15 &lt; 65536, an unsigned Q15 value representing the so that 32768 &lt;= r_Q15 &lt; 65536, an unsigned Q15 value representing the
fractional part of rng. fractional part of rng.
Then the following procedure can be used to add one bit of precision to l. Then the following procedure can be used to add one bit of precision to lg.
First, update First, update
<figure align="center"> <figure align="center">
<artwork align="center"> <artwork align="center">
@ -1422,11 +1418,11 @@ First, update
r_Q15 = (r_Q15*r_Q15) >> 15 . r_Q15 = (r_Q15*r_Q15) >> 15 .
]]></artwork> ]]></artwork>
</figure> </figure>
Then add the 16th bit of r_Q15 to l via Then add the 16th bit of r_Q15 to lg via
<figure align="center"> <figure align="center">
<artwork align="center"> <artwork align="center">
<![CDATA[ <![CDATA[
l = 2*l + (r_Q15 >> 16) . lg = 2*lg + (r_Q15 >> 16) .
]]></artwork> ]]></artwork>
</figure> </figure>
Finally, if this bit was a 1, reduce r_Q15 by a factor of two via Finally, if this bit was a 1, reduce r_Q15 by a factor of two via
@ -1439,8 +1435,8 @@ r_Q15 = r_Q15 >> 1 ,
so that it once again lies in the range 32768 &lt;= r_Q15 &lt; 65536. so that it once again lies in the range 32768 &lt;= r_Q15 &lt; 65536.
</t> </t>
<t> <t>
This procedure is repeated three times to extend l to 1/8th bit precision. This procedure is repeated three times to extend lg to 1/8th bit precision.
ec_tell_frac() then returns (nbits_total*8 - l). ec_tell_frac() then returns (nbits_total*8 - lg).
</t> </t>
</section> </section>
@ -5301,7 +5297,7 @@ resolution is shown in the tables below.
<t> <t>
A negative TF adjustment means that the temporal resolution is increased, A negative TF adjustment means that the temporal resolution is increased,
while a positive TF adjustment means that the frequency resolution is increased. while a positive TF adjustment means that the frequency resolution is increased.
Changes in TF resolution are implemented using the Hadamard transform. To increase Changes in TF resolution are implemented using the Hadamard transform <xref target="Hadamard"/>. To increase
the time resolution by N, N "levels" of the Hadamard transform are applied to the the time resolution by N, N "levels" of the Hadamard transform are applied to the
decoded vector for each interleaved MDCT vector. To increase the frequency resolution decoded vector for each interleaved MDCT vector. To increase the frequency resolution
(assumes a transient frame), then N levels of the Hadamard transform are applied (assumes a transient frame), then N levels of the Hadamard transform are applied
@ -5459,9 +5455,9 @@ artifact than if the frame were dropped after decoding.
<t> <t>
A decoder MAY employ a more sophisticated drift compensation method. For A decoder MAY employ a more sophisticated drift compensation method. For
example, the example, the
<eref target='http://code.google.com/p/webrtc/source/browse/trunk/src/modules/audio_coding/NetEQ/main/source/?r=583'>NetEQ component</eref> <xref target='Google-NetEQ'>NetEQ component</xref>
of the of the
<eref target='http://code.google.com/p/webrtc/'>WebRTC.org codebase</eref> <xref target='Google-WebRTC'>Google WebRTC codebase</xref>
compensates for drift by adding or removing compensates for drift by adding or removing
one period when the signal is highly periodic. The reference implementation of one period when the signal is highly periodic. The reference implementation of
Opus allows a caller to learn whether the current frame's signal is highly Opus allows a caller to learn whether the current frame's signal is highly
@ -6822,7 +6818,7 @@ of the scalar quantizer, and as a result the quantization error of
each value depends on the quantization decision of the previous value. each value depends on the quantization decision of the previous value.
This dependency is exploited by the delayed decision mechanism to This dependency is exploited by the delayed decision mechanism to
search for a quantization sequency with best R/D performance search for a quantization sequency with best R/D performance
with a Viterbi-like algorithm . with a Viterbi-like algorithm <xref target="Viterbi"/>.
The quantizer processes the residual LSF vector in reverse order The quantizer processes the residual LSF vector in reverse order
(i.e., it starts with the highest residual LSF value). (i.e., it starts with the highest residual LSF value).
This is done because the prediction works slightly This is done because the prediction works slightly
@ -7274,14 +7270,15 @@ are built and &lt;vector path&gt; is the directory containing the test vectors.
<section title="Opus Custom"> <section title="Opus Custom">
<t> <t>
To complement the Opus specification, the "Opus Custom" codec is defined to Opus Custom is an OPTIONAL part of the specification that is defined to
handle special sample rates and frame rates that are not supported by the handle special sample rates and frame rates that are not supported by the
main Opus specification. Use of Opus Custom is discouraged for all but very main Opus specification. Use of Opus Custom is discouraged for all but very
special applications for which a frame size different from 2.5, 5, 10, or 20&nbsp;ms is special applications for which a frame size different from 2.5, 5, 10, or 20&nbsp;ms is
needed (for either complexity or latency reasons). Such applications will not needed (for either complexity or latency reasons). Because Opus Custom is
be compatible with the "main" Opus codec. In Opus Custom operation, optional, applications using that part of the specification may not be compatible
only the CELT layer is available, which is available using the celt_* function with other applications implementing Opus. In Opus Custom operation,
calls in celt.h. only the CELT layer is available, using the opus_custom_* function
calls in opus_custom.h.
</t> </t>
</section> </section>
@ -7338,7 +7335,7 @@ Sending the decoder packets generated by a version of the reference encoder
</t> </t>
</list> </list>
In all of the conditions above, both the encoder and the decoder were run In all of the conditions above, both the encoder and the decoder were run
inside the <eref target="http://valgrind.org/">Valgrind</eref> memory inside the <xref target="Valgrind">Valgrind</xref> memory
debugger, which tracks reads and writes to invalid memory regions as well as debugger, which tracks reads and writes to invalid memory regions as well as
the use of uninitialized memory. the use of uninitialized memory.
There were no errors reported on any of the tested conditions. There were no errors reported on any of the tested conditions.
@ -7407,7 +7404,7 @@ name of work, or endorsement information.</t>
<format type='TXT' target='http://tools.ietf.org/rfc/rfc6366.txt' /> <format type='TXT' target='http://tools.ietf.org/rfc/rfc6366.txt' />
</reference> </reference>
<reference anchor='SILK'> <reference anchor='SILK' target='http://developer.skype.com/silk'>
<front> <front>
<title>SILK Speech Codec</title> <title>SILK Speech Codec</title>
<author initials='K.' surname='Vos' fullname='K. Vos'> <author initials='K.' surname='Vos' fullname='K. Vos'>
@ -7442,7 +7439,7 @@ Robust and Efficient Quantization of Speech LSP Parameters Using Structured Vect
<seriesInfo name="ICASSP-1991, Proc. IEEE Int. Conf. Acoust., Speech, Signal Processing, pp. 641-644, October" value="1991"/> <seriesInfo name="ICASSP-1991, Proc. IEEE Int. Conf. Acoust., Speech, Signal Processing, pp. 641-644, October" value="1991"/>
</reference> </reference>
<reference anchor='CELT'> <reference anchor='CELT' target='http://celt-codec.org/'>
<front> <front>
<title>Constrained-Energy Lapped Transform (CELT) Codec</title> <title>Constrained-Energy Lapped Transform (CELT) Codec</title>
<author initials='J-M.' surname='Valin' fullname='J-M. Valin'> <author initials='J-M.' surname='Valin' fullname='J-M. Valin'>
@ -7472,8 +7469,8 @@ Robust and Efficient Quantization of Speech LSP Parameters Using Structured Vect
<abstract> <abstract>
<t></t> <t></t>
</abstract></front> </abstract></front>
<seriesInfo name='Internet-Draft' value='draft-ietf-avtcore-srtp-vbr-audio-03' /> <seriesInfo name='RFC' value='6562' />
<format type='TXT' target='http://tools.ietf.org/html/draft-ietf-avtcore-srtp-vbr-audio-03' /> <format type='TXT' target='http://tools.ietf.org/html/rfc6562' />
</reference> </reference>
<reference anchor='DOS'> <reference anchor='DOS'>
@ -7536,6 +7533,98 @@ Robust and Efficient Quantization of Speech LSP Parameters Using Structured Vect
<seriesInfo name="IEEE Trans. on Information Theory, Vol. 32" value="pp. 568-583" /> <seriesInfo name="IEEE Trans. on Information Theory, Vol. 32" value="pp. 568-583" />
</reference> </reference>
<reference anchor="Valgrind" target="http://valgrind.org/">
<front>
<title>Valgrind website</title>
<author></author>
</front>
</reference>
<reference anchor="Google-NetEQ" target="http://code.google.com/p/webrtc/source/browse/trunk/src/modules/audio_coding/NetEQ/main/source/?r=583">
<front>
<title>Google NetEQ code</title>
<author></author>
</front>
</reference>
<reference anchor="Google-WebRTC" target="http://code.google.com/p/webrtc/">
<front>
<title>Google WebRTC code</title>
<author></author>
</front>
</reference>
<reference anchor="Opus-git" target="git://git.xiph.org/opus.git">
<front>
<title>Opus Git Repository</title>
<author></author>
</front>
</reference>
<reference anchor="Opus-website" target="http://opus-codec.org/">
<front>
<title>Opus website</title>
<author></author>
</front>
</reference>
<reference anchor="Vectors-website" target="http://opus-codec.org/testvectors/">
<front>
<title>Opus Testvectors (webside)</title>
<author></author>
</front>
</reference>
<reference anchor="Vectors-proc" target="http://www.ietf.org/proceedings/83/slides/slides-83-codec-0.gz">
<front>
<title>Opus Testvectors (proceedings)</title>
<author></author>
</front>
</reference>
<reference anchor="Hadamard" target="http://en.wikipedia.org/wiki/Hadamard_transform">
<front>
<title>Hadamard Transform</title>
<author><organization>Wikipedia</organization></author>
</front>
</reference>
<reference anchor="Viterbi" target="http://en.wikipedia.org/wiki/Viterbi_algorithm">
<front>
<title>Viterbi Algorithm</title>
<author><organization>Wikipedia</organization></author>
</front>
</reference>
<reference anchor="Whitening" target="http://en.wikipedia.org/wiki/White_noise">
<front>
<title>White Noise</title>
<author><organization>Wikipedia</organization></author>
</front>
</reference>
<reference anchor="LPC" target="http://en.wikipedia.org/wiki/Linear_prediction">
<front>
<title>Linear Prediction</title>
<author><organization>Wikipedia</organization></author>
</front>
</reference>
<reference anchor="MDCT" target="http://en.wikipedia.org/wiki/Modified_discrete_cosine_transform">
<front>
<title>Modified Discrete Cosine Transform</title>
<author><organization>Wikipedia</organization></author>
</front>
</reference>
<reference anchor="FFT" target="http://en.wikipedia.org/wiki/Fast_Fourier_transform">
<front>
<title>Fast Fourier Transform</title>
<author><organization>Wikipedia</organization></author>
</front>
</reference>
</references> </references>
<section anchor="ref-implementation" title="Reference Implementation"> <section anchor="ref-implementation" title="Reference Implementation">
@ -7551,7 +7640,7 @@ available in the README file.
<t>The implementation can be compiled with either a C89 or a C99 <t>The implementation can be compiled with either a C89 or a C99
compiler. It is reasonably optimized for most platforms such that compiler. It is reasonably optimized for most platforms such that
only architecture-specific optimizations are likely to be useful. only architecture-specific optimizations are likely to be useful.
The FFT used is a slightly modified version of the KISS-FFT library, The FFT <xref target="FFT"/> used is a slightly modified version of the KISS-FFT library,
but it is easy to substitute any other FFT library. but it is easy to substitute any other FFT library.
</t> </t>
@ -7586,7 +7675,7 @@ following command line:
<list style="symbols"> <list style="symbols">
<t><![CDATA[ <t><![CDATA[
cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/\s\s\s###//' | base64 -d > opus_source.tar.gz cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/...###//' | base64 -d > opus_source.tar.gz
]]></t> ]]></t>
<t> <t>
tar xzvf opus_source.tar.gz tar xzvf opus_source.tar.gz
@ -7594,11 +7683,19 @@ tar xzvf opus_source.tar.gz
<t>cd opus_source</t> <t>cd opus_source</t>
<t>make</t> <t>make</t>
</list> </list>
On systems where the provided Makefile does not work, the following command line may be used to compile
the source code:
<list style="symbols">
<t><![CDATA[
cc -O2 -g -o opus_demo src/opus_demo.c `cat *.mk | grep -v fixed | sed -e 's/.*=//' -e 's/\\\\//'` -DOPUS_BUILD -Iinclude -Icelt -Isilk -Isilk/float -Drestrict= -lm
]]></t></list>
</t>
<t>
On systems where the base64 utility is not present, the following commands can be used instead: On systems where the base64 utility is not present, the following commands can be used instead:
<list style="symbols"> <list style="symbols">
<t><![CDATA[ <t><![CDATA[
cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/\s\s\s###//' > opus.b64 cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/...###//' > opus.b64
]]></t> ]]></t>
<t>openssl base64 -d -in opus.b64 > opus_source.tar.gz</t> <t>openssl base64 -d -in opus.b64 > opus_source.tar.gz</t>
</list> </list>
@ -7606,12 +7703,13 @@ cat draft-ietf-codec-opus.txt | grep '^\ \ \ ###' | sed -e 's/\s\s\s###//' > opu
</t> </t>
</section> </section>
<section title="Development Versions"> <section title="Up-to-date Implementation">
<t> <t>
The current development version of the source code is available in a As of the time of publication of this memo, up-to-date source code implementing
<eref target='git://git.opus-codec.org/opus.git'>Git repository</eref>. this standard is available in a
Development snapshots are provided at <xref target='Opus-git'>Git repository</xref>.
<eref target='http://opus-codec.org/'/>. Releases and other resources are available at
<xref target='Opus-website'/>.
</t> </t>
</section> </section>
@ -7624,9 +7722,8 @@ Development snapshots are provided at
<section anchor="test-vectors" title="Test Vectors"> <section anchor="test-vectors" title="Test Vectors">
<t> <t>
Because of size constraints, the Opus test vectors are not distributed in this Because of size constraints, the Opus test vectors are not distributed in this
draft. They are available from the Opus codec website at draft. They are available in the proceedings of the 83th IETF meeting (Paris) <xref target="Vectors-proc"/> and from the Opus codec website at
<eref target="http://opus-codec.org/testvectors/"/> and will also be made available <xref target="Vectors-website"/>. These test vectors were created specifically to exercise
in IETF meeting proceedings. These test vectors were created specifically to exercise
all aspects of the decoder and therefore the audio quality of the decoded output is all aspects of the decoder and therefore the audio quality of the decoded output is
significantly lower than what Opus can achieve in normal operation. significantly lower than what Opus can achieve in normal operation.
</t> </t>

View file

@ -120,7 +120,7 @@ extern "C" {
#define OPUS_AUTO -1000 /**<Auto/default setting @hideinitializer*/ #define OPUS_AUTO -1000 /**<Auto/default setting @hideinitializer*/
#define OPUS_BITRATE_MAX -1 /**<Maximum bitrate @hideinitializer*/ #define OPUS_BITRATE_MAX -1 /**<Maximum bitrate @hideinitializer*/
/** Best for "standard" VoIP/videoconference applications where listening quality and intelligibility matter most /** Best for most VoIP/videoconference applications where listening quality and intelligibility matter most
* @hideinitializer */ * @hideinitializer */
#define OPUS_APPLICATION_VOIP 2048 #define OPUS_APPLICATION_VOIP 2048
/** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input /** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input

View file

@ -292,7 +292,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
if (st->prev_mode==MODE_CELT_ONLY) if (st->prev_mode==MODE_CELT_ONLY)
silk_InitDecoder( silk_dec ); silk_InitDecoder( silk_dec );
/* The SILK PLC cannot support produce frames of less than 10 ms */ /* The SILK PLC cannot produce frames of less than 10 ms */
st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
if (data != NULL) if (data != NULL)
@ -574,7 +574,7 @@ static int opus_packet_parse_impl(const unsigned char *data, int len,
last_size = len-size[0]; last_size = len-size[0];
break; break;
/* Multiple CBR/VBR frames (from 0 to 120 ms) */ /* Multiple CBR/VBR frames (from 0 to 120 ms) */
case 3: default: /*case 3:*/
if (len<1) if (len<1)
return OPUS_INVALID_PACKET; return OPUS_INVALID_PACKET;
/* Number of frames encoded in bits 0 to 5 */ /* Number of frames encoded in bits 0 to 5 */

View file

@ -36,7 +36,7 @@ echo Testing mono
echo "==============" echo "=============="
echo echo
for file in `seq -w 1 11` for file in 01 02 03 04 05 06 07 08 09 10 11 12
do do
if [ -e $VECTOR_PATH/testvector$file.bit ]; then if [ -e $VECTOR_PATH/testvector$file.bit ]; then
echo Testing testvector$file echo Testing testvector$file
@ -66,7 +66,7 @@ echo Testing stereo
echo "==============" echo "=============="
echo echo
for file in `seq -w 1 11` for file in 01 02 03 04 05 06 07 08 09 10 11 12
do do
if [ -e $VECTOR_PATH/testvector$file.bit ]; then if [ -e $VECTOR_PATH/testvector$file.bit ]; then
echo Testing testvector$file echo Testing testvector$file