Switch example tools to use VBR and 960 sample frames by default on the basis that if the user doesn't have any particular requirements that they probably want this.

Minor change in the VBR behavior to hot-start with some internal state
parameters which were observed to be quite consistent across bitrates,
framesizes, and content. This also prevents it from completely burning
the reserve capacity on the first frame if its a short.

Also switch some maximum frame sizes to match the OPUS draft maximums.
This commit is contained in:
Gregory Maxwell 2010-10-28 03:52:21 -04:00 committed by Jean-Marc Valin
parent eedb42282a
commit a9411472cd
3 changed files with 33 additions and 34 deletions

View file

@ -67,7 +67,7 @@ struct CELTEncoder {
int complexity; int complexity;
int start, end; int start, end;
celt_int32 vbr_rate_norm; /* Target number of 16th bits per frame */ celt_int32 vbr_rate_norm; /* Target number of 8th bits per frame */
/* Everything beyond this point gets cleared on a reset */ /* Everything beyond this point gets cleared on a reset */
#define ENCODER_RESET_START frame_max #define ENCODER_RESET_START frame_max
@ -132,6 +132,7 @@ CELTEncoder *celt_encoder_init(CELTEncoder *st, const CELTMode *mode, int channe
st->end = st->mode->effEBands; st->end = st->mode->effEBands;
st->vbr_rate_norm = 0; st->vbr_rate_norm = 0;
st->vbr_offset = -140<<BITRES;
st->force_intra = 0; st->force_intra = 0;
st->delayedIntra = 1; st->delayedIntra = 1;
st->tonal_average = 256; st->tonal_average = 256;
@ -794,7 +795,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
{ {
celt_word16 alpha; celt_word16 alpha;
celt_int32 delta; celt_int32 delta;
/* The target rate in 16th bits per frame */ /* The target rate in 8th bits per frame */
celt_int32 vbr_rate; celt_int32 vbr_rate;
celt_int32 target; celt_int32 target;
celt_int32 vbr_bound, max_allowed; celt_int32 vbr_bound, max_allowed;
@ -819,18 +820,16 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, c
else if (M > 1) else if (M > 1)
target-=(target+14)/28; target-=(target+14)/28;
/* The average energy is removed from the target and the actual /* The current offset is removed from the target and the space used
energy added*/ so far is added*/
target=target+st->vbr_offset-(50<<BITRES)+ec_enc_tell(enc, BITRES); target=target+st->vbr_offset+ec_enc_tell(enc, BITRES);
/* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */ /* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */
target=IMIN(nbAvailableBytes<<(BITRES+3),target); target=IMIN(nbAvailableBytes<<(BITRES+3),target);
/* Make the adaptation coef (alpha) higher at the beginning */ if (st->vbr_count < 970)
if (st->vbr_count < 990)
{ {
st->vbr_count++; st->vbr_count++;
alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+10),16)); alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16));
/*printf ("%d %d\n", st->vbr_count+10, alpha);*/
} else } else
alpha = QCONST16(.001f,15); alpha = QCONST16(.001f,15);

View file

@ -41,7 +41,7 @@
#include <math.h> #include <math.h>
#include <string.h> #include <string.h>
#define MAX_PACKET 1024 #define MAX_PACKET 1275
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {

View file

@ -1,6 +1,6 @@
/* Copyright (c) 2002-2007 Jean-Marc Valin /* Copyright (c) 2002-2010 Jean-Marc Valin
Copyright (c) 2007-2009 Xiph.Org Foundation Copyright (c) 2007-2010 Xiph.Org Foundation
Copyright (c) 2008-2009 Gregory Maxwell Copyright (c) 2008-2010 Gregory Maxwell
File: celtenc.c File: celtenc.c
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -81,7 +81,7 @@ int oe_write_page(ogg_page *page, FILE *fp)
} }
#define MAX_FRAME_SIZE 2048 #define MAX_FRAME_SIZE 2048
#define MAX_FRAME_BYTES 1024 #define MAX_FRAME_BYTES 1275
#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ #define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */
@ -215,9 +215,9 @@ void usage(void)
printf ("\n"); printf ("\n");
printf ("Options:\n"); printf ("Options:\n");
printf (" --bitrate n Encoding bit-rate in kbit/sec\n"); printf (" --bitrate n Encoding bit-rate in kbit/sec\n");
printf (" --vbr Use variable bitrate encoding\n"); printf (" --cbr Use constant bitrate encoding\n");
printf (" --comp n Encoding complexity (0-10)\n"); printf (" --comp n Encoding complexity (0-10)\n");
printf (" --framesize n Frame size (Default: 256)\n"); printf (" --framesize n Frame size (Default: 960)\n");
printf (" --noltp Do not use long-term prediction\n"); printf (" --noltp Do not use long-term prediction\n");
printf (" --independent Encode frames independently (implies noltp)\n"); printf (" --independent Encode frames independently (implies noltp)\n");
printf (" --skeleton Outputs ogg skeleton metadata (may cause incompatibilities)\n"); printf (" --skeleton Outputs ogg skeleton metadata (may cause incompatibilities)\n");
@ -236,7 +236,7 @@ void usage(void)
printf (" --be Raw input is big-endian\n"); printf (" --be Raw input is big-endian\n");
printf (" --8bit Raw input is 8-bit unsigned\n"); printf (" --8bit Raw input is 8-bit unsigned\n");
printf (" --16bit Raw input is 16-bit signed\n"); printf (" --16bit Raw input is 16-bit signed\n");
printf ("Default raw PCM input is 16-bit, little-endian, mono\n"); printf ("Default raw PCM input is 48kHz, 16-bit, little-endian, stereo\n");
} }
@ -248,20 +248,20 @@ int main(int argc, char **argv)
char *inFile, *outFile; char *inFile, *outFile;
FILE *fin, *fout; FILE *fin, *fout;
short input[MAX_FRAME_SIZE]; short input[MAX_FRAME_SIZE];
celt_int32 frame_size = 256; celt_int32 frame_size = 960;
int quiet=0; int quiet=0;
int nbBytes; int nbBytes;
CELTMode *mode; CELTMode *mode;
void *st; void *st;
unsigned char bits[MAX_FRAME_BYTES]; unsigned char bits[MAX_FRAME_BYTES];
int with_vbr = 0; int with_cbr = 0;
int with_skeleton = 0; int with_skeleton = 0;
int total_bytes = 0; int total_bytes = 0;
int peak_bytes = 0; int peak_bytes = 0;
struct option long_options[] = struct option long_options[] =
{ {
{"bitrate", required_argument, NULL, 0}, {"bitrate", required_argument, NULL, 0},
{"vbr",no_argument,NULL, 0}, {"cbr",no_argument,NULL, 0},
{"comp", required_argument, NULL, 0}, {"comp", required_argument, NULL, 0},
{"noltp", no_argument, NULL, 0}, {"noltp", no_argument, NULL, 0},
{"independent", no_argument, NULL, 0}, {"independent", no_argument, NULL, 0},
@ -284,7 +284,7 @@ int main(int argc, char **argv)
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
int print_bitrate=0; int print_bitrate=0;
celt_int32 rate=44100; celt_int32 rate=48000;
celt_int32 size; celt_int32 size;
int chan=1; int chan=1;
int fmt=16; int fmt=16;
@ -305,7 +305,7 @@ int main(int argc, char **argv)
char first_bytes[12]; char first_bytes[12];
int wave_input=0; int wave_input=0;
celt_int32 lookahead = 0; celt_int32 lookahead = 0;
int bytes_per_packet=48; int bytes_per_packet=-1;
int complexity=-127; int complexity=-127;
int prediction=2; int prediction=2;
int bitstream; int bitstream;
@ -325,9 +325,9 @@ int main(int argc, char **argv)
if (strcmp(long_options[option_index].name,"bitrate")==0) if (strcmp(long_options[option_index].name,"bitrate")==0)
{ {
bitrate = atof (optarg); bitrate = atof (optarg);
} else if (strcmp(long_options[option_index].name,"vbr")==0) } else if (strcmp(long_options[option_index].name,"cbr")==0)
{ {
with_vbr=1; with_cbr=1;
} else if (strcmp(long_options[option_index].name,"skeleton")==0) } else if (strcmp(long_options[option_index].name,"skeleton")==0)
{ {
with_skeleton=1; with_skeleton=1;
@ -483,12 +483,12 @@ int main(int argc, char **argv)
fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too high. Setting CELT to %d bytes/frame.\n",bitrate,MAX_FRAME_BYTES); fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too high. Setting CELT to %d bytes/frame.\n",bitrate,MAX_FRAME_BYTES);
} }
if (with_vbr) if (with_cbr)
{ {
bitrate = ((rate/(float)frame_size)*8*bytes_per_packet)/1000.0;
} else {
/*In VBR mode the bytes_per_packet argument becomes a hard maximum. 3x the average rate is just a random choice.*/ /*In VBR mode the bytes_per_packet argument becomes a hard maximum. 3x the average rate is just a random choice.*/
bytes_per_packet=IMIN(bytes_per_packet*3,MAX_FRAME_BYTES); bytes_per_packet=IMIN(bytes_per_packet*3,MAX_FRAME_BYTES);
} else {
bitrate = ((rate/(float)frame_size)*8*bytes_per_packet)/1000.0;
} }
mode = celt_mode_create(rate, frame_size, NULL); mode = celt_mode_create(rate, frame_size, NULL);
@ -510,18 +510,18 @@ int main(int argc, char **argv)
if (chan==2) if (chan==2)
st_string="stereo"; st_string="stereo";
if (!quiet) if (!quiet)
if (with_vbr) if (with_cbr)
fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d maximum bytes per packet) with bitstream version %d\n", fprintf (stderr, "Encoding %.0f kHz %s audio in %.0fms packets at %0.3fkbit/sec (%d bytes per packet, CBR) with bitstream version %d\n",
header.sample_rate, st_string, frame_size, bitrate, bytes_per_packet,bitstream); header.sample_rate/1000., st_string, frame_size/(float)header.sample_rate*1000., bitrate, bytes_per_packet,bitstream);
else else
fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d bytes per packet) with bitstream version %d\n", fprintf (stderr, "Encoding %.0f kHz %s audio in %.0fms packets at %0.3fkbit/sec (%d bytes per packet maximum) with bitstream version %d\n",
header.sample_rate, st_string, frame_size, bitrate, bytes_per_packet,bitstream); header.sample_rate/1000., st_string, frame_size/(float)header.sample_rate*1000., bitrate, bytes_per_packet,bitstream);
} }
/*Initialize CELT encoder*/ /*Initialize CELT encoder*/
st = celt_encoder_create(mode, chan, NULL); st = celt_encoder_create(mode, chan, NULL);
if (with_vbr) if (!with_cbr)
{ {
int tmp = (bitrate*1000); int tmp = (bitrate*1000);
if (celt_encoder_ctl(st, CELT_SET_VBR_RATE(tmp)) != CELT_OK) if (celt_encoder_ctl(st, CELT_SET_VBR_RATE(tmp)) != CELT_OK)
@ -731,7 +731,7 @@ int main(int argc, char **argv)
bytes_written += ret; bytes_written += ret;
} }
if (with_vbr && !quiet) if (!with_cbr && !quiet)
fprintf (stderr, "Average rate %0.3fkbit/sec, %d peak bytes per packet\n", (total_bytes*8.0/((float)nb_encoded/header.sample_rate))/1000.0, peak_bytes); fprintf (stderr, "Average rate %0.3fkbit/sec, %d peak bytes per packet\n", (total_bytes*8.0/((float)nb_encoded/header.sample_rate))/1000.0, peak_bytes);
celt_encoder_destroy(st); celt_encoder_destroy(st);