Adds 24-bit Opus encoder/decoder API

This commit is contained in:
Jean-Marc Valin 2024-06-12 12:57:48 -04:00
parent 8b16ce9055
commit 977ee01670
No known key found for this signature in database
GPG key ID: 8D2952BBB52C646D
6 changed files with 341 additions and 42 deletions

View file

@ -166,6 +166,7 @@ typedef opus_val16 opus_res;
#define RES2VAL16(a) RES2INT16(a)
#define FLOAT2SIG(a) float2int(((opus_int32)32768<<SIG_SHIFT)*(a))
#define INT16TOSIG(a) SHL32(EXTEND32(a), SIG_SHIFT)
#define INT24TOSIG(a) SHL32(a, SIG_SHIFT-8)
#define celt_isnan(x) 0
@ -332,6 +333,7 @@ static OPUS_INLINE int celt_isnan(float x)
#define RES2VAL16(a) (a)
#define FLOAT2SIG(a) ((a)*CELT_SIG_SCALE)
#define INT16TOSIG(a) ((float)(a))
#define INT24TOSIG(a) ((float)(a)*(1.f/256.f))
#endif /* !FIXED_POINT */

View file

@ -268,6 +268,42 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode(
opus_int32 max_data_bytes
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
/** Encodes an Opus frame.
* @param [in] st <tt>OpusEncoder*</tt>: Encoder state
* @param [in] pcm <tt>opus_int32*</tt>: Input signal (interleaved if 2 channels) representing (or slightly exceeding) 24-bit values. length is frame_size*channels*sizeof(opus_int32)
* @param [in] frame_size <tt>int</tt>: Number of samples per channel in the
* input signal.
* This must be an Opus frame size for
* the encoder's sampling rate.
* For example, at 48 kHz the permitted
* values are 120, 240, 480, 960, 1920,
* and 2880.
* Passing in a duration of less than
* 10 ms (480 samples at 48 kHz) will
* prevent the encoder from using the LPC
* or hybrid modes.
* @param [out] data <tt>unsigned char*</tt>: Output payload.
* This must contain storage for at
* least \a max_data_bytes.
* @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated
* memory for the output
* payload. This may be
* used to impose an upper limit on
* the instant bitrate, but should
* not be used as the only bitrate
* control. Use #OPUS_SET_BITRATE to
* control the bitrate.
* @returns The length of the encoded packet (in bytes) on success or a
* negative error code (see @ref opus_errorcodes) on failure.
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode24(
OpusEncoder *st,
const opus_int32 *pcm,
int frame_size,
unsigned char *data,
opus_int32 max_data_bytes
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4);
/** Encodes an Opus frame from floating point input.
* @param [in] st <tt>OpusEncoder*</tt>: Encoder state
* @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0.
@ -483,6 +519,31 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode(
int decode_fec
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
/** Decode an Opus packet.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
* @param [in] len <tt>opus_int32</tt>: Number of bytes in payload*
* @param [out] pcm <tt>opus_int32*</tt>: Output signal (interleaved if 2 channels) representing (or slightly exceeding) 24-bit values. length
* is frame_size*channels*sizeof(opus_int32)
* @param [in] frame_size Number of samples per channel of available space in \a pcm.
* If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will
* not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1),
* then frame_size needs to be exactly the duration of audio that is missing, otherwise the
* decoder will not be in the optimal state to decode the next incoming packet. For the PLC and
* FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms.
* @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be
* decoded. If no such data is available, the frame is decoded as if it were lost.
* @returns Number of decoded samples or @ref opus_errorcodes
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode24(
OpusDecoder *st,
const unsigned char *data,
opus_int32 len,
opus_int32 *pcm,
int frame_size,
int decode_fec
) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4);
/** Decode an Opus packet with floating point output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss
@ -596,7 +657,7 @@ OPUS_EXPORT int opus_dred_parse(OpusDREDDecoder *dred_dec, OpusDRED *dred, const
*/
OPUS_EXPORT int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src, OpusDRED *dst);
/** Decode audio from an Opus DRED packet with floating point output.
/** Decode audio from an Opus DRED packet with 16-bit output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] dred <tt>OpusDRED*</tt>: DRED state
* @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
@ -608,6 +669,18 @@ OPUS_EXPORT int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src
*/
OPUS_EXPORT int opus_decoder_dred_decode(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int16 *pcm, opus_int32 frame_size);
/** Decode audio from an Opus DRED packet with 24-bit output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] dred <tt>OpusDRED*</tt>: DRED state
* @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
* @param [out] pcm <tt>opus_int32*</tt>: Output signal (interleaved if 2 channels). length
* is frame_size*channels*sizeof(opus_int16)
* @param [in] frame_size Number of samples per channel to decode in \a pcm.
* frame_size <b>must</b> be a multiple of 2.5 ms.
* @returns Number of decoded samples or @ref opus_errorcodes
*/
OPUS_EXPORT int opus_decoder_dred_decode24(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int32 *pcm, opus_int32 frame_size);
/** Decode audio from an Opus DRED packet with floating point output.
* @param [in] st <tt>OpusDecoder*</tt>: Decoder state
* @param [in] dred <tt>OpusDRED*</tt>: DRED state

View file

@ -819,7 +819,7 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
#ifdef FIXED_POINT
#ifdef ENABLE_RES24
int opus_decode(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
{
VARDECL(opus_res, out);
int ret, i;
@ -851,14 +851,59 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
RESTORE_STACK;
return ret;
}
#else
int opus_decode(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
int opus_decode24(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
{
if(frame_size<=0)
return OPUS_BAD_ARG;
return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
}
#else
int opus_decode(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec)
{
if(frame_size<=0)
return OPUS_BAD_ARG;
return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
}
int opus_decode24(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
{
VARDECL(opus_res, out);
int ret, i;
int nb_samples;
ALLOC_STACK;
if(frame_size<=0)
{
RESTORE_STACK;
return OPUS_BAD_ARG;
}
if (data != NULL && len > 0 && !decode_fec)
{
nb_samples = opus_decoder_get_nb_samples(st, data, len);
if (nb_samples>0)
frame_size = IMIN(frame_size, nb_samples);
else
return OPUS_INVALID_PACKET;
}
celt_assert(st->channels == 1 || st->channels == 2);
ALLOC(out, frame_size*st->channels, opus_res);
ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
pcm[i] = RES2INT24(out[i]);
}
RESTORE_STACK;
return ret;
}
#endif
#ifndef DISABLE_FLOAT_API
@ -934,6 +979,41 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
return ret;
}
int opus_decode24(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_int32 *pcm, int frame_size, int decode_fec)
{
VARDECL(float, out);
int ret, i;
int nb_samples;
ALLOC_STACK;
if(frame_size<=0)
{
RESTORE_STACK;
return OPUS_BAD_ARG;
}
if (data != NULL && len > 0 && !decode_fec)
{
nb_samples = opus_decoder_get_nb_samples(st, data, len);
if (nb_samples>0)
frame_size = IMIN(frame_size, nb_samples);
else
return OPUS_INVALID_PACKET;
}
celt_assert(st->channels == 1 || st->channels == 2);
ALLOC(out, frame_size*st->channels, float);
ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1, NULL, 0);
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
pcm[i] = RES2INT24(out[i]);
}
RESTORE_STACK;
return ret;
}
int opus_decode_float(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec)
{
@ -1498,7 +1578,41 @@ int opus_decoder_dred_decode(OpusDecoder *st, const OpusDRED *dred, opus_int32 d
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
pcm[i] = FLOAT2INT16(out[i]);
pcm[i] = RES2INT16(out[i]);
}
RESTORE_STACK;
return ret;
#else
(void)st;
(void)dred;
(void)dred_offset;
(void)pcm;
(void)frame_size;
return OPUS_UNIMPLEMENTED;
#endif
}
int opus_decoder_dred_decode24(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int32 *pcm, opus_int32 frame_size)
{
#ifdef ENABLE_DRED
VARDECL(float, out);
int ret, i;
ALLOC_STACK;
if(frame_size<=0)
{
RESTORE_STACK;
return OPUS_BAD_ARG;
}
celt_assert(st->channels == 1 || st->channels == 2);
ALLOC(out, frame_size*st->channels, float);
ret = opus_decode_native(st, NULL, 0, out, frame_size, 0, 0, NULL, 1, dred, dred_offset);
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
pcm[i] = RES2INT24(out[i]);
}
RESTORE_STACK;
return ret;

View file

@ -739,6 +739,29 @@ void downmix_int(const void *_x, opus_val32 *y, int subframe, int offset, int c1
}
}
void downmix_int24(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
{
const opus_int32 *x;
int j;
x = (const opus_int32 *)_x;
for (j=0;j<subframe;j++)
y[j] = INT24TOSIG(x[(j+offset)*C+c1]);
if (c2>-1)
{
for (j=0;j<subframe;j++)
y[j] += INT24TOSIG(x[(j+offset)*C+c2]);
} else if (c2==-2)
{
int c;
for (c=1;c<C;c++)
{
for (j=0;j<subframe;j++)
y[j] += INT24TOSIG(x[(j+offset)*C+c]);
}
}
}
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
{
int new_size;
@ -2534,6 +2557,16 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
RESTORE_STACK;
return ret;
}
opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
{
int frame_size;
frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
return opus_encode_native(st, pcm, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 0);
}
#else
opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
@ -2543,6 +2576,30 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
return opus_encode_native(st, pcm, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
}
opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
{
int i, ret;
int frame_size;
VARDECL(opus_res, in);
ALLOC_STACK;
frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
if (frame_size <= 0)
{
RESTORE_STACK;
return OPUS_BAD_ARG;
}
ALLOC(in, frame_size*st->channels, opus_res);
for (i=0;i<frame_size*st->channels;i++)
in[i] = INT24TORES(pcm[i]);
ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 1);
RESTORE_STACK;
return ret;
}
#endif /* ENABLE_RES24 */
#else
@ -2563,12 +2620,37 @@ opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_fram
ALLOC(in, frame_size*st->channels, float);
for (i=0;i<frame_size*st->channels;i++)
in[i] = (1.0f/32768)*pcm[i];
in[i] = INT16TORES(pcm[i]);
ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
RESTORE_STACK;
return ret;
}
opus_int32 opus_encode24(OpusEncoder *st, const opus_int32 *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 max_data_bytes)
{
int i, ret;
int frame_size;
VARDECL(float, in);
ALLOC_STACK;
frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
if (frame_size <= 0)
{
RESTORE_STACK;
return OPUS_BAD_ARG;
}
ALLOC(in, frame_size*st->channels, float);
for (i=0;i<frame_size*st->channels;i++)
in[i] = INT24TORES(pcm[i]);
ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
pcm, analysis_frame_size, 0, -2, st->channels, downmix_int24, 0);
RESTORE_STACK;
return ret;
}
opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
unsigned char *data, opus_int32 out_data_bytes)
{

View file

@ -174,6 +174,7 @@ typedef void (*opus_copy_channel_out_func)(
typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
void downmix_int24(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
int is_digital_silence(const opus_res* pcm, int frame_size, int channels, int lsb_depth);
int encode_size(int size, unsigned char *data);

View file

@ -232,6 +232,20 @@ int test_encode(TestCustomParams params) {
}
}
}
else {
if (params.encoder_bit_depth == 24) {
opus_int32* input = (opus_int32*)inbuf;
len = opus_encode24(enc,
&input[samp_count*num_channels],
frame_size,
packet,
MAX_PACKET);
if (len <= 0) {
fprintf(stderr, "opus_encode24() failed: %s\n", opus_strerror(len));
ret = -1;
break;
}
}
else {
opus_int16* input = (opus_int16*)inbuf;
len = opus_encode(enc,
@ -246,6 +260,7 @@ int test_encode(TestCustomParams params) {
}
}
}
}
/* Generate bit/byte errors and check that nothing bad happens. */
{
@ -355,6 +370,22 @@ int test_encode(TestCustomParams params) {
}
}
}
else {
if (params.decoder_bit_depth == 24) {
opus_int32* output = (opus_int32*)outbuf;
samples_decoded = opus_decode24(dec,
packet,
len,
&output[samp_count*num_channels],
frame_size,
0);
if (samples_decoded != frame_size) {
fprintf(stderr, "opus_decode24() returned %d\n", samples_decoded);
ret = -1;
break;
}
}
else {
opus_int16* output = (opus_int16*)outbuf;
samples_decoded = opus_decode(dec,
@ -370,13 +401,19 @@ int test_encode(TestCustomParams params) {
}
}
}
}
samp_count += frame_size;
} while (samp_count + frame_size <= input_samples);
#ifdef RESYNTH
/* Resynth only works with OpusCustom encoder */
if (params.custom_encode && params.custom_decode) {
/* Resynth only works with OpusCustom encoder. Also, we don't enable it if there's
a 16-bit bottleneck in the decoder that can cause clipping. */
if (params.custom_encode && (params.custom_decode
#if !defined(FIXED_POINT) || defined(ENABLE_RES24)
|| params.decoder_bit_depth > 16
#endif
)) {
if (params.float_encode) {
float* input = (float*)inbuf;
float* output = (float*)outbuf;
@ -552,18 +589,8 @@ void test_opus_custom(const int num_encoders, const int num_setting_changes) {
params.float_encode = 0;
params.float_decode = 0;
#endif
if (params.custom_encode) {
params.encoder_bit_depth = RAND_SAMPLE(encoder_bit_depths);
}
else {
params.encoder_bit_depth = 16;
}
if (params.custom_decode) {
params.decoder_bit_depth = RAND_SAMPLE(decoder_bit_depths);
}
else {
params.decoder_bit_depth = 16;
}
#ifdef RESYNTH
/* Resynth logic works best when encoder/decoder use same datatype */
params.float_decode = params.float_encode;