diff --git a/libcelt/celt.c b/libcelt/celt.c index b8283845..9aaaa6eb 100644 --- a/libcelt/celt.c +++ b/libcelt/celt.c @@ -789,6 +789,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i VARDECL(int, fine_quant); VARDECL(celt_word16, error); VARDECL(int, pulses); + VARDECL(int, cap); VARDECL(int, offsets); VARDECL(int, fine_priority); VARDECL(int, tf_res); @@ -1107,8 +1108,12 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); } + ALLOC(cap, st->mode->nbEBands, int); ALLOC(offsets, st->mode->nbEBands, int); + for (i=0;imode->nbEBands;i++) + cap[i] = st->mode->cache.caps[st->mode->nbEBands*(2*LM+C-1)+i] + << C+LM+BITRES-2; for (i=0;imode->nbEBands;i++) offsets[i] = 0; /* Dynamic allocation code */ @@ -1154,7 +1159,7 @@ int celt_encode_with_ec_float(CELTEncoder * restrict st, const celt_sig * pcm, i dynalloc_loop_logp = dynalloc_logp; boost = 0; for (j = 0; tell+(dynalloc_loop_logp<=2&&bits>=(LM+2<mode, st->start, st->end, offsets, + codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands); st->lastCodedBands = codedBands; @@ -1953,6 +1958,7 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da VARDECL(celt_ener, bandE); VARDECL(int, fine_quant); VARDECL(int, pulses); + VARDECL(int, cap); VARDECL(int, offsets); VARDECL(int, fine_priority); VARDECL(int, tf_res); @@ -2107,9 +2113,14 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da spread_decision = ec_dec_icdf(dec, spread_icdf, 5); ALLOC(pulses, st->mode->nbEBands, int); + ALLOC(cap, st->mode->nbEBands, int); ALLOC(offsets, st->mode->nbEBands, int); ALLOC(fine_priority, st->mode->nbEBands, int); + for (i=0;imode->nbEBands;i++) + cap[i] = st->mode->cache.caps[st->mode->nbEBands*(2*LM+C-1)+i] + << C+LM+BITRES-2; + dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_dec_tell(dec, BITRES); @@ -2124,8 +2135,7 @@ int celt_decode_with_ec_float(CELTDecoder * restrict st, const unsigned char *da quanta = IMIN(width<=2&&bits>=(LM+2<mode, st->start, st->end, offsets, + codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0); diff --git a/libcelt/dump_modes.c b/libcelt/dump_modes.c index 84b0b71a..14564704 100644 --- a/libcelt/dump_modes.c +++ b/libcelt/dump_modes.c @@ -129,6 +129,11 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes) for (j=0;jcache.size;j++) fprintf (file, "%d, ", mode->cache.bits[j]); fprintf (file, "};\n"); + fprintf (file, "static const unsigned char cache_caps%d[%d] = {\n", mode->Fs/mdctSize, (mode->maxLM+1)*2*mode->nbEBands); + for (j=0;j<(mode->maxLM+1)*2*mode->nbEBands;j++) + fprintf (file, "%d, ", mode->cache.caps[j]); + fprintf (file, "};\n"); + fprintf(file, "#endif\n"); fprintf(file, "\n"); @@ -226,8 +231,8 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes) fprintf(file, "%d,\t/* nbShortMdcts */\n", mode->nbShortMdcts); fprintf(file, "%d,\t/* shortMdctSize */\n", mode->shortMdctSize); fprintf(file, "logN%d,\t/* logN */\n", framerate); - fprintf(file, "{%d, cache_index%d, cache_bits%d},\t/* cache */\n", - mode->cache.size, mode->Fs/mdctSize, mode->Fs/mdctSize); + fprintf(file, "{%d, cache_index%d, cache_bits%d, cache_caps%d},\t/* cache */\n", + mode->cache.size, mode->Fs/mdctSize, mode->Fs/mdctSize, mode->Fs/mdctSize); fprintf(file, "};\n"); } fprintf(file, "\n"); diff --git a/libcelt/modes.c b/libcelt/modes.c index 5f801ccc..42acdb1a 100644 --- a/libcelt/modes.c +++ b/libcelt/modes.c @@ -432,6 +432,7 @@ void celt_mode_destroy(CELTMode *mode) celt_free((celt_int16*)mode->cache.index); celt_free((unsigned char*)mode->cache.bits); + celt_free((unsigned char*)mode->cache.caps); clt_mdct_clear(&mode->mdct); celt_free((CELTMode *)mode); diff --git a/libcelt/modes.h b/libcelt/modes.h index e4fc4603..4757523c 100644 --- a/libcelt/modes.h +++ b/libcelt/modes.h @@ -71,6 +71,7 @@ typedef struct { int size; const celt_int16 *index; const unsigned char *bits; + const unsigned char *caps; } PulseCache; /** Mode definition (opaque) diff --git a/libcelt/rate.c b/libcelt/rate.c index 63cfd22a..1770ba8a 100644 --- a/libcelt/rate.c +++ b/libcelt/rate.c @@ -77,7 +77,9 @@ static int fits_in32(int _n, int _k) void compute_pulse_cache(CELTMode *m, int LM) { + int C; int i; + int j; int curr=0; int nbEntries=0; int entryN[100], entryK[100], entryI[100]; @@ -85,6 +87,7 @@ void compute_pulse_cache(CELTMode *m, int LM) PulseCache *cache = &m->cache; celt_int16 *cindex; unsigned char *bits; + unsigned char *cap; cindex = celt_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2)); cache->index = cindex; @@ -92,7 +95,6 @@ void compute_pulse_cache(CELTMode *m, int LM) /* Scan for all unique band sizes */ for (i=0;i<=LM+1;i++) { - int j; for (j=0;jnbEBands;j++) { int k; @@ -133,7 +135,6 @@ void compute_pulse_cache(CELTMode *m, int LM) /* Compute the cache for all unique sizes */ for (i=0;icaps = cap = celt_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands); + for (i=0;i<=LM;i++) + { + for (C=1;C<=2;C++) + { + int shift; + shift = C+i+BITRES-2; + for (j=0;jnbEBands;j++) + { + int N0; + int max_bits; + int rmask; + N0 = m->eBands[j+1]-m->eBands[j]; + rmask = N0==1 ? (1< 4 && !(N0&1)) + { + N0>>=1; + LM0--; + } + /* N0=1 and N0=2 bands can't be split down to N=2. */ + else if (N0 <= 2) + { + LM0=IMIN(i,3-N0); + N0<<=LM0; + } + /* Compute the cost for the lowest-level PVQ of a fully split + band. */ + pcache = bits + cindex[(LM0+1)*m->nbEBands+j]; + max_bits = pcache[pcache[0]]+1; + /* Add in the cost of coding regular splits. */ + N = N0; + for(k=0;klogN[j]+(LM0+k<>1)-QTHETA_OFFSET; + /* The number of qtheta bits we'll allocate if the remainder + is to be max_bits. */ + num=(celt_int32)((2*N-1)*offset+max_bits)<<9; + den=((celt_int32)(2*N-1)<<9)-495; + qb = IMIN((num+(den>>1))/den, 8<= 0); + /* The average cost for theta when qn==256 is + 7.73246 bits for the triangular PDF. */ + max_bits += qb*495+256>>9; + N <<= 1; + } + /* Add in the cost of a stereo split, if necessary. */ + if (C==2) + { + max_bits <<= 1; + offset = (m->logN[j]+(i<>1)-QTHETA_OFFSET_STEREO; + ndof = 2*N-1-(N==2); + num = (celt_int32)(max_bits+ndof*offset)<<7; + den = ((celt_int32)ndof<<7)-(N==2?128:125); + qb = IMIN((num+(den>>1))/den, 8<= 0); + /* The average cost for theta when qn==256, N>2 is + 7.8174 bits for the step PDF. */ + max_bits += N==2 ? qb : (qb*125+64>>7); + } + /* Add the fine bits we'll use. */ + /* Compensate for the extra DoF in stereo */ + ndof = C*N + ((C==2 && N>2) ? 1 : 0); + /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET + compared to their "fair share" of total/N */ + offset = (m->logN[j] + (i<>1)-FINE_OFFSET; + /* N=2 is the only point that doesn't match the curve */ + if (N==2) + offset += 1<>2; + /* The number of fine bits we'll allocate if the remainder is + to be max_bits. */ + num = max_bits+ndof*offset; + den = ndof-1<>1))/den, MAX_FINE_BITS); + celt_assert(qb >= 0); + max_bits += C*qb<>shift < 256); + *cap++ = (unsigned char)(max_bits+rmask>>shift); + } + } + } } #endif /* !STATIC_MODES */ @@ -149,7 +256,7 @@ void compute_pulse_cache(CELTMode *m, int LM) #define ALLOC_STEPS 6 static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start, - const int *bits1, const int *bits2, const int *thresh, int total, int skip_rsv, + const int *bits1, const int *bits2, const int *thresh, const int *cap, int total, int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits, int *ebits, int *fine_priority, int _C, int LM, void *ec, int encode, int prev) { @@ -184,7 +291,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int { done = 1; /* Don't allocate more than we can actually use */ - psum += IMIN(tmp, 64*C<= alloc_floor) psum += alloc_floor; @@ -210,7 +317,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int } else done = 1; /* Don't allocate more than we can actually use */ - tmp = IMIN(tmp, 64*C<= 0); celt_assert(ebits[j] >= 0); @@ -405,7 +521,7 @@ static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int return codedBands; } -int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, int alloc_trim, int *intensity, int *dual_stereo, +int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, int total, int *pulses, int *ebits, int *fine_priority, int _C, int LM, void *ec, int encode, int prev) { int lo, hi, len, j; @@ -476,7 +592,7 @@ int compute_allocation(const CELTMode *m, int start, int end, const int *offsets { done = 1; /* Don't allocate more than we can actually use */ - psum += IMIN(bits1[j], 64*C<= C<