Multiplier-free entropy coder
This commit is contained in:
parent
aeb4467d7f
commit
2ec8d9e522
26 changed files with 1597 additions and 2 deletions
|
@ -5,7 +5,7 @@
|
|||
AUTOMAKE_OPTIONS = 1.6
|
||||
|
||||
#Fools KDevelop into including all files
|
||||
SUBDIRS = libcelt
|
||||
SUBDIRS = libcelt libentcode
|
||||
|
||||
rpm: dist
|
||||
rpmbuild -ta ${PACKAGE}-${VERSION}.tar.gz
|
||||
|
|
|
@ -121,7 +121,7 @@ fi
|
|||
AC_SUBST(SIZE16)
|
||||
AC_SUBST(SIZE32)
|
||||
|
||||
AC_OUTPUT([Makefile libcelt/Makefile])
|
||||
AC_OUTPUT([Makefile libcelt/Makefile] libentcode/Makefile)
|
||||
|
||||
if test "x$src" = "x"; then
|
||||
echo "**IMPORTANT**"
|
||||
|
|
10
libentcode/Makefile.am
Normal file
10
libentcode/Makefile.am
Normal file
|
@ -0,0 +1,10 @@
|
|||
INCLUDES =
|
||||
METASOURCES = AUTO
|
||||
lib_LTLIBRARIES = libentcode.la
|
||||
libentcode_la_SOURCES = bitrdec.c bitree.c bitrenc.c ecintrin.h entcode.c \
|
||||
entdec.c entenc.c mfrngdec.c mfrngenc.c probdec.c probenc.c probmod.c
|
||||
bin_PROGRAMS = ectest
|
||||
ectest_SOURCES = ectest.c
|
||||
ectest_LDADD = $(top_builddir)/libentcode/libentcode.la
|
||||
noinst_HEADERS = bitrdec.h bitree.h bitrenc.h ecintrin.h entcode.h entdec.h \
|
||||
entenc.h mfrngcod.h probdec.h probenc.h probmod.h
|
24
libentcode/bitrdec.c
Normal file
24
libentcode/bitrdec.c
Normal file
|
@ -0,0 +1,24 @@
|
|||
#include "bitree.h"
|
||||
|
||||
int ec_bitree_find_and_update(unsigned *_this,int _sz,int _split,
|
||||
unsigned _freq,unsigned *_fl,int _val){
|
||||
int base;
|
||||
int test;
|
||||
int fl;
|
||||
base=-1;
|
||||
fl=0;
|
||||
while(_split>0){
|
||||
test=base+_split;
|
||||
if(test<_sz){
|
||||
if(_freq>=_this[test]){
|
||||
_freq-=_this[test];
|
||||
fl+=_this[test];
|
||||
base=test;
|
||||
}
|
||||
else _this[test]+=_val;
|
||||
}
|
||||
_split>>=1;
|
||||
}
|
||||
*_fl=fl;
|
||||
return base+1;
|
||||
}
|
22
libentcode/bitrdec.h
Normal file
22
libentcode/bitrdec.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#if !defined(_bitrenc_H)
|
||||
# define _bitredec_H (1)
|
||||
# include "bitree.h"
|
||||
|
||||
/*Decoder-specific functions for Binary Indexed Trees.
|
||||
See bitree.h for more detailed documentation.*/
|
||||
|
||||
/*Gets the symbol that corresponds with a given frequency.
|
||||
This is an omnibus function that also computes the cumulative frequency of
|
||||
the symbols before the one returned, and updates the count of that symbol by
|
||||
the given amount.
|
||||
_sz: The size of the table.
|
||||
_split: The largest power of two less than OR EQUAL to the table size.
|
||||
_freq: A frequency in the range of one of the symbols in the alphabet.
|
||||
_fl: Returns the sum of the frequencies of the symbols less than that of
|
||||
the returned symbol.
|
||||
_val: The amount to add to returned symbol's frequency.
|
||||
Return: The smallest symbol whose cumulative frequency is greater than freq.*/
|
||||
int ec_bitree_find_and_update(unsigned *_this,int _sz,int _split,
|
||||
unsigned _freq,unsigned *_fl,int _val);
|
||||
|
||||
#endif
|
101
libentcode/bitree.c
Normal file
101
libentcode/bitree.c
Normal file
|
@ -0,0 +1,101 @@
|
|||
#include "bitree.h"
|
||||
|
||||
void ec_bitree_to_counts(unsigned *_this,int _sz,int _split){
|
||||
int p;
|
||||
int q;
|
||||
int s;
|
||||
for(p=_split;p>1;p=q){
|
||||
q=p>>1;
|
||||
for(s=p-1;s<_sz;s+=p)_this[s]-=_this[s-q];
|
||||
}
|
||||
}
|
||||
|
||||
void ec_bitree_from_counts(unsigned *_this,int _sz){
|
||||
int p;
|
||||
int q;
|
||||
int s;
|
||||
for(q=1,p=2;p<=_sz;q=p,p=q<<1){
|
||||
for(s=p-1;s<_sz;s+=p)_this[s]+=_this[s-q];
|
||||
}
|
||||
}
|
||||
|
||||
unsigned ec_bitree_get_cumul(const unsigned *_this,int _sym){
|
||||
unsigned ret;
|
||||
ret=0;
|
||||
while(_sym>0){
|
||||
ret+=_this[_sym-1];
|
||||
_sym&=_sym-1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned ec_bitree_get_freq(const unsigned *_this,int _sym){
|
||||
unsigned ret;
|
||||
int p;
|
||||
ret=_this[_sym];
|
||||
p=_sym&_sym+1;
|
||||
while(_sym!=p){
|
||||
ret-=_this[_sym-1];
|
||||
_sym&=_sym-1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*Fenwick's approach to re-scaling the counts.
|
||||
This tests to be twice as slow or more than the one below, even with inline
|
||||
functions enabled, and without loop vectorization (which would make Moffat's
|
||||
approach even faster).*/
|
||||
void ec_bitree_halve(unsigned *_this,int _sz,int _split){
|
||||
int i;
|
||||
for(i=_sz;i-->0;){
|
||||
ec_bitree_update(_this,_sz,i,-(int)(ec_bitree_get_freq(_this,i)>>1));
|
||||
}
|
||||
}
|
||||
#else
|
||||
/*Fenwick mentions that this approach is also possible, and this is what
|
||||
Moffat uses.
|
||||
Simply convert the tree into a simple array of counts, perform the halving,
|
||||
and then convert it back.*/
|
||||
void ec_bitree_halve(unsigned *_this,int _sz,int _split){
|
||||
int i;
|
||||
ec_bitree_to_counts(_this,_sz,_split);
|
||||
/*LOOP VECTORIZES.*/
|
||||
for(i=0;i<_sz;i++)_this[i]-=_this[i]>>1;
|
||||
ec_bitree_from_counts(_this,_sz);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#include <stdio.h>
|
||||
/*Simple regression test code.
|
||||
Compile with bitrenc.c and bitrdec.c as well.*/
|
||||
|
||||
static void ec_bitree_print(unsigned *_this,int _sz){
|
||||
int i;
|
||||
for(i=0;i<_sz;i++)printf("%3i%c",_this[i],i+1<_sz?' ':'\n');
|
||||
}
|
||||
|
||||
int main(void){
|
||||
unsigned t[16]={0,8,4,9,2,10,5,11,1,12,6,13,3,14,7,15};
|
||||
int fl;
|
||||
int s;
|
||||
int i;
|
||||
ec_bitree_print(t,16);
|
||||
ec_bitree_from_counts(t,16);
|
||||
ec_bitree_print(t,16);
|
||||
for(i=0;i<=16;i++)printf("%3i%c",ec_bitree_get_cumul(t,i),i<16?' ':'\n');
|
||||
for(i=0;i<t[15];i++){
|
||||
s=ec_bitree_find_and_update(t,16,16,i,&fl,0);
|
||||
printf("%3i: %i %3i\n",i,s,fl);
|
||||
}
|
||||
for(i=0;i<16;i++){
|
||||
s=ec_bitree_find_and_update(t,16,ec_bitree_get_cumul(t,i),&fl,100);
|
||||
ec_bitree_to_counts(t,16,16);
|
||||
ec_bitree_print(t,16);
|
||||
ec_bitree_from_counts(t,16);
|
||||
ec_bitree_update(t,16,s,-100);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
84
libentcode/bitree.h
Normal file
84
libentcode/bitree.h
Normal file
|
@ -0,0 +1,84 @@
|
|||
/*Implements Binary Indexed Trees for cumulative probability tables, based
|
||||
upon a combination of the techniques described in \cite{Fen93,Fen95,Mof99}.
|
||||
This is a really, amazingly elegant data structure, that maintains
|
||||
cumulative frequency tables in logarithmic time, using exactly the same
|
||||
space as an ordinary frequency table.
|
||||
In addition, the non-cumulative frequency can be retrieved in constant
|
||||
amortized time (under 2 memory references per symbol on average).
|
||||
|
||||
We are dealing primarily with relatively small alphabets and are not sorting
|
||||
symbols by frequency, and so we adopt Fenwick's tree organization strategy.
|
||||
It's complexity has better constant factors, and although it is logarithmic
|
||||
in n (the alphabet size) instead of s (the index of the symbol), the latter
|
||||
is not expected to be appreciably smaller.
|
||||
We modify it however, to remove the special cases surrounding the element 0,
|
||||
which greatly streamlines the code.
|
||||
Our scheme has the added benefit that for alphabet sizes that are a power of
|
||||
2, the last element of the array is the total cumulative frequency count.
|
||||
|
||||
We choose Moffat's approach to halving the entire frequency table, which is
|
||||
over twice as fast in practice as that suggested by Fenwick, even though
|
||||
they have the same number of memory accesses.
|
||||
We also adapt Moffat's suggestion for an omnibus decode function that updates
|
||||
the count of the symbol being decoded while it is searching for it.
|
||||
We also have it retain and return the cumulative frequency count needed to
|
||||
update the arithmetic decoder.
|
||||
|
||||
See bitrenc.h and bitrdec.h for encoding- and decoding-specific functions.
|
||||
|
||||
@TECHREPORT{Fen93,
|
||||
author ="Peter Fenwick",
|
||||
title ="A new data structure for cumulative probability tables",
|
||||
institution="The University of Auckland, Department of Computer Science",
|
||||
year =1993,
|
||||
number =88,
|
||||
month =May,
|
||||
URL ="http://www.cs.auckland.ac.nz/~peter-f/ftplink/TechRep88.ps"
|
||||
}
|
||||
@TECHREPORT{Fen95,
|
||||
author ="Peter Fenwick",
|
||||
title ="A new data structure for cumulative probability tables: an
|
||||
improved frequency to symbol algorithm",
|
||||
institution="The University of Auckland, Department of Computer Science",
|
||||
year =1995,
|
||||
number =110,
|
||||
month =Feb,
|
||||
URL ="http://www.cs.auckland.ac.nz/~peter-f/ftplink/TechRep110.ps"
|
||||
}
|
||||
@ARTICLE{Mof99,
|
||||
author ="Alistair Moffat",
|
||||
title ="An improved data structure for cumulative probability tables",
|
||||
journal ="Software Practice and Experience",
|
||||
volume =29,
|
||||
number =7,
|
||||
pages ="647--659",
|
||||
year =1999
|
||||
}*/
|
||||
#if !defined(_bitree_H)
|
||||
# define _bitree_H (1)
|
||||
|
||||
/*Converts an array of frequency counts to our cumulative tree representation.
|
||||
_sz: The size of the table.*/
|
||||
void ec_bitree_from_counts(unsigned *_this,int _sz);
|
||||
|
||||
/*Converts our cumulative tree representation to an array of frequency counts.
|
||||
_sz: The size of the table.
|
||||
_split: The largest power of two less than OR EQUAL to the table size.*/
|
||||
void ec_bitree_to_counts(unsigned *_this,int _sz,int _split);
|
||||
|
||||
/*Gets the cumulative frequency of the symbols less than the given one.
|
||||
_sym: The symbol to obtain the cumulative frequency for.
|
||||
Return: The sum of the frequencies of the symbols less than _sym.*/
|
||||
unsigned ec_bitree_get_cumul(const unsigned *_this,int _sym);
|
||||
|
||||
/*Gets the frequency of a single symbol.
|
||||
_sym: The symbol to obtain the frequency for.
|
||||
Return: The frequency of _sym.*/
|
||||
unsigned ec_bitree_get_freq(const unsigned *_this,int _sym);
|
||||
|
||||
/*Halves the frequency of each symbol, rounding up.
|
||||
_sz: The size of the table.
|
||||
_split: The largest power of two less than OR EQUAL to the table size.*/
|
||||
void ec_bitree_halve(unsigned *_this,int _sz,int _split);
|
||||
|
||||
#endif
|
9
libentcode/bitrenc.c
Normal file
9
libentcode/bitrenc.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
#include "bitrenc.h"
|
||||
|
||||
void ec_bitree_update(unsigned *_this,int _sz,int _sym,int _val){
|
||||
do{
|
||||
_this[_sym]+=_val;
|
||||
_sym+=_sym+1&-(_sym+1);
|
||||
}
|
||||
while(_sym<_sz);
|
||||
}
|
14
libentcode/bitrenc.h
Normal file
14
libentcode/bitrenc.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#if !defined(_bitrenc_H)
|
||||
# define _bitrenc_H (1)
|
||||
# include "bitree.h"
|
||||
|
||||
/*Encoder-specific functions for Binary Indexed Trees.
|
||||
See bitree.h for more detailed documentation.*/
|
||||
|
||||
/*Updates the frequency of a given symbol.
|
||||
_sz: The size of the table.
|
||||
_sym: The symbol to update.
|
||||
_val: The amount to add to this symbol's frequency.*/
|
||||
void ec_bitree_update(unsigned *_this,int _sz,int _sym,int _val);
|
||||
|
||||
#endif
|
71
libentcode/ecintrin.h
Normal file
71
libentcode/ecintrin.h
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*Some common macros for potential platform-specific optimization.*/
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#if !defined(_ecintrin_H)
|
||||
# define _ecintrin_H (1)
|
||||
|
||||
/*Some specific platforms may have optimized intrinsic or inline assembly
|
||||
versions of these functions which can substantially improve performance.
|
||||
We define macros for them to allow easy incorporation of these non-ANSI
|
||||
features.*/
|
||||
|
||||
/*Note that we do not provide a macro for abs(), because it is provided as a
|
||||
library function, which we assume is translated into an intrinsic to avoid
|
||||
the function call overhead and then implemented in the smartest way for the
|
||||
target platform.
|
||||
With modern gcc (4.x), this is true: it uses cmov instructions if the
|
||||
architecture supports it and branchless bit-twiddling if it does not (the
|
||||
speed difference between the two approaches is not measurable).
|
||||
Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150)
|
||||
by Sun Microsystems, despite prior art dating back to at least 1996:
|
||||
http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT
|
||||
On gcc 3.x, however, our assumption is not true, as abs() is translated to a
|
||||
conditional jump, which is horrible on deeply piplined architectures (e.g.,
|
||||
all consumer architectures for the past decade or more) when the sign cannot
|
||||
be reliably predicted.*/
|
||||
|
||||
/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if
|
||||
given an appropriate architecture, but the branchless bit-twiddling versions
|
||||
are just as fast, and do not require any special target architecture.
|
||||
Earlier gcc versions (3.x) compiled both code to the same assembly
|
||||
instructions, because of the way they represented ((_b)>(_a)) internally.*/
|
||||
#define EC_MAXI(_a,_b) ((_a)-((_a)-(_b)&-((_b)>(_a))))
|
||||
#define EC_MINI(_a,_b) ((_a)+((_b)-(_a)&-((_b)<(_a))))
|
||||
/*This has a chance of compiling branchless, and is just as fast as the
|
||||
bit-twiddling method, which is slightly less portable, since it relies on a
|
||||
sign-extended rightshift, which is not guaranteed by ANSI (but present on
|
||||
every relevant platform).*/
|
||||
#define EC_SIGNI(_a) (((_a)>0)-((_a)<0))
|
||||
/*Slightly more portable than relying on a sign-extended right-shift (which is
|
||||
not guaranteed by ANSI), and just as fast, since gcc (3.x and 4.x both)
|
||||
compile it into the right-shift anyway.*/
|
||||
#define EC_SIGNMASK(_a) (-((_a)<0))
|
||||
/*Clamps an integer into the given range.
|
||||
If _a>_c, then the lower bound _a is respected over the upper bound _c (this
|
||||
behavior is required to meet our documented API behavior).
|
||||
_a: The lower bound.
|
||||
_b: The value to clamp.
|
||||
_c: The upper boud.*/
|
||||
#define EC_CLAMPI(_a,_b,_c) (EC_MAXI(_a,EC_MINI(_b,_c)))
|
||||
/*Count leading zeros.
|
||||
This macro should only be used for implementing ec_ilog(), if it is defined.
|
||||
All other code should use EC_ILOG() instead.*/
|
||||
#if __GNUC_PREREQ(3,4)
|
||||
# if INT_MAX>=2147483647
|
||||
# define EC_CLZ0 sizeof(unsigned)*CHAR_BIT
|
||||
# define EC_CLZ(_x) (__builtin_clz(_x))
|
||||
# elif LONG_MAX>=2147483647L
|
||||
# define EC_CLZ0 sizeof(unsigned long)*CHAR_BIT
|
||||
# define EC_CLZ(_x) (__builtin_clzl(_x))
|
||||
# endif
|
||||
#endif
|
||||
#if defined(EC_CLZ)
|
||||
/*Note that __builtin_clz is not defined when _x==0, according to the gcc
|
||||
documentation (and that of the BSR instruction that implements it on x86),
|
||||
so we have to special-case it.*/
|
||||
# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x)&-!!(_x))
|
||||
#else
|
||||
# define EC_ILOG(_x) (ec_ilog(_x))
|
||||
#endif
|
||||
|
||||
#endif
|
95
libentcode/ectest.c
Normal file
95
libentcode/ectest.c
Normal file
|
@ -0,0 +1,95 @@
|
|||
#include <stdio.h>
|
||||
#include "probenc.h"
|
||||
#include "probdec.h"
|
||||
|
||||
int main(int _argc,char **_argv){
|
||||
ec_byte_buffer buf;
|
||||
ec_enc enc;
|
||||
ec_dec dec;
|
||||
ec_probmod mod;
|
||||
int ft;
|
||||
int ftb;
|
||||
int sym;
|
||||
int sz;
|
||||
int s;
|
||||
int i;
|
||||
/*Testing encoding of raw bit values.*/
|
||||
ec_byte_writeinit(&buf);
|
||||
ec_enc_init(&enc,&buf);
|
||||
for(ft=0;ft<1024;ft++){
|
||||
for(i=0;i<ft;i++){
|
||||
ec_enc_uint(&enc,i,ft);
|
||||
}
|
||||
}
|
||||
/*Testing encoding of raw bit values.*/
|
||||
for(ftb=0;ftb<16;ftb++){
|
||||
for(i=0;i<(1<<ftb);i++){
|
||||
ec_enc_bits(&enc,i,ftb);
|
||||
}
|
||||
}
|
||||
for(sz=1;sz<256;sz++){
|
||||
ec_probmod_init_full(&mod,sz,1,sz+(sz>>1),NULL);
|
||||
for(i=0;i<sz;i++){
|
||||
s=((unsigned)(i*45678901+7))%sz;
|
||||
ec_probmod_write(&mod,&enc,s);
|
||||
}
|
||||
ec_probmod_clear(&mod);
|
||||
}
|
||||
for(sz=11;sz<256;sz++){
|
||||
ec_probmod_init_full(&mod,sz,1,sz+(sz>>1),NULL);
|
||||
for(i=0;i<sz;i++){
|
||||
s=((unsigned)(i*45678901+7))%sz;
|
||||
ec_probmod_write_range(&mod,&enc,s,EC_MAXI(s-5,0),EC_MINI(s+6,sz));
|
||||
}
|
||||
ec_probmod_clear(&mod);
|
||||
}
|
||||
ec_enc_done(&enc);
|
||||
fprintf(stderr,"Encoded to %li bytes.\n",(long)(buf.ptr-buf.buf));
|
||||
ec_byte_readinit(&buf,ec_byte_get_buffer(&buf),ec_byte_bytes(&buf));
|
||||
ec_dec_init(&dec,&buf);
|
||||
for(ft=0;ft<1024;ft++){
|
||||
for(i=0;i<ft;i++){
|
||||
sym=ec_dec_uint(&dec,ft);
|
||||
if(sym!=i){
|
||||
fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for(ftb=0;ftb<16;ftb++){
|
||||
for(i=0;i<(1<<ftb);i++){
|
||||
sym=ec_dec_bits(&dec,ftb);
|
||||
if(sym!=i){
|
||||
fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for(sz=1;sz<256;sz++){
|
||||
ec_probmod_init_full(&mod,sz,1,sz+(sz>>1),NULL);
|
||||
for(i=0;i<sz;i++){
|
||||
s=((unsigned)(i*45678901+7))%sz;
|
||||
sym=ec_probmod_read(&mod,&dec);
|
||||
if(sym!=s){
|
||||
fprintf(stderr,"Decoded %i instead of %i with sz of %i.\n",sym,s,sz);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ec_probmod_clear(&mod);
|
||||
}
|
||||
for(sz=11;sz<256;sz++){
|
||||
ec_probmod_init_full(&mod,sz,1,sz+(sz>>1),NULL);
|
||||
for(i=0;i<sz;i++){
|
||||
s=((unsigned)(i*45678901+7))%sz;
|
||||
sym=ec_probmod_read_range(&mod,&dec,EC_MAXI(s-5,0),EC_MINI(s+6,sz));
|
||||
if(sym!=s){
|
||||
fprintf(stderr,"Decoded %i instead of %i with sz of %i.\n",sym,s,sz);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ec_probmod_clear(&mod);
|
||||
}
|
||||
ec_byte_writeclear(&buf);
|
||||
fprintf(stderr,"All tests passed.\n");
|
||||
return 0;
|
||||
}
|
44
libentcode/entcode.c
Normal file
44
libentcode/entcode.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
#include "entcode.h"
|
||||
|
||||
|
||||
|
||||
void ec_byte_reset(ec_byte_buffer *_b){
|
||||
_b->ptr=_b->buf;
|
||||
}
|
||||
|
||||
long ec_byte_bytes(ec_byte_buffer *_b){
|
||||
return _b->ptr-_b->buf;
|
||||
}
|
||||
|
||||
unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b){
|
||||
return _b->buf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ec_ilog(ec_uint32 _v){
|
||||
#if defined(EC_CLZ)
|
||||
return EC_CLZ0-EC_CLZ(_v)&-!!_v;
|
||||
#else
|
||||
/*On a Pentium M, this branchless version tested as the fastest on
|
||||
1,000,000,000 random 32-bit integers, edging out a similar version with
|
||||
branches, and a 256-entry LUT version.*/
|
||||
int ret;
|
||||
int m;
|
||||
ret=!!_v;
|
||||
m=!!(_v&0xFFFF0000)<<4;
|
||||
_v>>=m;
|
||||
ret|=m;
|
||||
m=!!(_v&0xFF00)<<3;
|
||||
_v>>=m;
|
||||
ret|=m;
|
||||
m=!!(_v&0xF0)<<2;
|
||||
_v>>=m;
|
||||
ret|=m;
|
||||
m=!!(_v&0xC)<<1;
|
||||
_v>>=m;
|
||||
ret|=m;
|
||||
ret+=!!(_v&0x2);
|
||||
return ret;
|
||||
#endif
|
||||
}
|
49
libentcode/entcode.h
Normal file
49
libentcode/entcode.h
Normal file
|
@ -0,0 +1,49 @@
|
|||
#if !defined(_entcode_H)
|
||||
# define _entcode_H (1)
|
||||
# include <limits.h>
|
||||
# include "ecintrin.h"
|
||||
|
||||
|
||||
|
||||
typedef unsigned ec_uint32;
|
||||
typedef struct ec_byte_buffer ec_byte_buffer;
|
||||
|
||||
|
||||
|
||||
/*The number of bits to code at a time when coding bits directly.*/
|
||||
# define EC_UNIT_BITS (8)
|
||||
/*The mask for the given bits.*/
|
||||
# define EC_UNIT_MASK ((1U<<EC_UNIT_BITS)-1)
|
||||
|
||||
|
||||
|
||||
/*Simple libogg1-style buffer.*/
|
||||
struct ec_byte_buffer{
|
||||
unsigned char *buf;
|
||||
unsigned char *ptr;
|
||||
long storage;
|
||||
};
|
||||
|
||||
/*Encoding functions.*/
|
||||
void ec_byte_writeinit(ec_byte_buffer *_b);
|
||||
void ec_byte_writetrunc(ec_byte_buffer *_b,long _bytes);
|
||||
void ec_byte_write1(ec_byte_buffer *_b,unsigned _value);
|
||||
void ec_byte_write4(ec_byte_buffer *_b,ec_uint32 _value);
|
||||
void ec_byte_writecopy(ec_byte_buffer *_b,void *_source,long _bytes);
|
||||
void ec_byte_writeclear(ec_byte_buffer *_b);
|
||||
/*Decoding functions.*/
|
||||
void ec_byte_readinit(ec_byte_buffer *_b,unsigned char *_buf,long _bytes);
|
||||
int ec_byte_look1(ec_byte_buffer *_b);
|
||||
int ec_byte_look4(ec_byte_buffer *_b,ec_uint32 *_val);
|
||||
void ec_byte_adv1(ec_byte_buffer *_b);
|
||||
void ec_byte_adv4(ec_byte_buffer *_b);
|
||||
int ec_byte_read1(ec_byte_buffer *_b);
|
||||
int ec_byte_read4(ec_byte_buffer *_b,ec_uint32 *_val);
|
||||
/*Shared functions.*/
|
||||
void ec_byte_reset(ec_byte_buffer *_b);
|
||||
long ec_byte_bytes(ec_byte_buffer *_b);
|
||||
unsigned char *ec_byte_get_buffer(ec_byte_buffer *_b);
|
||||
|
||||
int ec_ilog(ec_uint32 _v);
|
||||
|
||||
#endif
|
123
libentcode/entdec.c
Normal file
123
libentcode/entdec.c
Normal file
|
@ -0,0 +1,123 @@
|
|||
#include <stddef.h>
|
||||
#include "entdec.h"
|
||||
|
||||
|
||||
|
||||
void ec_byte_readinit(ec_byte_buffer *_b,unsigned char *_buf,long _bytes){
|
||||
_b->buf=_b->ptr=_buf;
|
||||
_b->storage=_bytes;
|
||||
}
|
||||
|
||||
int ec_byte_look1(ec_byte_buffer *_b){
|
||||
ptrdiff_t endbyte;
|
||||
endbyte=_b->ptr-_b->buf;
|
||||
if(endbyte>=_b->storage)return -1;
|
||||
else return _b->ptr[0];
|
||||
}
|
||||
|
||||
int ec_byte_look4(ec_byte_buffer *_b,ec_uint32 *_val){
|
||||
ptrdiff_t endbyte;
|
||||
endbyte=_b->ptr-_b->buf;
|
||||
if(endbyte+4>_b->storage){
|
||||
if(endbyte<_b->storage){
|
||||
*_val=_b->ptr[0];
|
||||
endbyte++;
|
||||
if(endbyte<_b->storage){
|
||||
*_val|=(ec_uint32)_b->ptr[1]<<8;
|
||||
endbyte++;
|
||||
if(endbyte<_b->storage)*_val|=(ec_uint32)_b->ptr[2]<<16;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
else{
|
||||
*_val=_b->ptr[0];
|
||||
*_val|=(ec_uint32)_b->ptr[1]<<8;
|
||||
*_val|=(ec_uint32)_b->ptr[2]<<16;
|
||||
*_val|=(ec_uint32)_b->ptr[3]<<24;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ec_byte_adv1(ec_byte_buffer *_b){
|
||||
_b->ptr++;
|
||||
}
|
||||
|
||||
void ec_byte_adv4(ec_byte_buffer *_b){
|
||||
_b->ptr+=4;
|
||||
}
|
||||
|
||||
int ec_byte_read1(ec_byte_buffer *_b){
|
||||
ptrdiff_t endbyte;
|
||||
endbyte=_b->ptr-_b->buf;
|
||||
if(endbyte>=_b->storage)return -1;
|
||||
else return *(_b->ptr++);
|
||||
}
|
||||
|
||||
int ec_byte_read4(ec_byte_buffer *_b,ec_uint32 *_val){
|
||||
unsigned char *end;
|
||||
end=_b->buf+_b->storage;
|
||||
if(_b->ptr+4>end){
|
||||
if(_b->ptr<end){
|
||||
*_val=*(_b->ptr++);
|
||||
if(_b->ptr<end){
|
||||
*_val|=(ec_uint32)*(_b->ptr++)<<8;
|
||||
if(_b->ptr<end)*_val|=(ec_uint32)*(_b->ptr++)<<16;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
else{
|
||||
*_val=(*_b->ptr++);
|
||||
*_val|=(ec_uint32)*(_b->ptr++)<<8;
|
||||
*_val|=(ec_uint32)*(_b->ptr++)<<16;
|
||||
*_val|=(ec_uint32)*(_b->ptr++)<<24;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb){
|
||||
ec_uint32 t;
|
||||
unsigned s;
|
||||
unsigned ft;
|
||||
t=0;
|
||||
while(_ftb>EC_UNIT_BITS){
|
||||
s=ec_decode(_this,EC_UNIT_MASK+1);
|
||||
ec_dec_update(_this,s,s+1,EC_UNIT_MASK+1);
|
||||
t=t<<EC_UNIT_BITS|s;
|
||||
_ftb-=EC_UNIT_BITS;
|
||||
}
|
||||
ft=1U<<_ftb;
|
||||
s=ec_decode(_this,ft);
|
||||
ec_dec_update(_this,s,s+1,ft);
|
||||
t=t<<_ftb|s;
|
||||
return t;
|
||||
}
|
||||
|
||||
ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft){
|
||||
ec_uint32 mask;
|
||||
ec_uint32 ft;
|
||||
ec_uint32 t;
|
||||
unsigned s;
|
||||
int ftb;
|
||||
t=0;
|
||||
_ft--;
|
||||
ftb=EC_ILOG(_ft);
|
||||
while(ftb>EC_UNIT_BITS){
|
||||
ftb-=EC_UNIT_BITS;
|
||||
ft=(_ft>>ftb)+1;
|
||||
s=ec_decode(_this,ft);
|
||||
ec_dec_update(_this,s,s+1,ft);
|
||||
t=t<<EC_UNIT_BITS|s;
|
||||
if(s<ft-1)return t<<ftb|ec_dec_bits(_this,ftb);
|
||||
mask=((ec_uint32)1<<ftb)-1;
|
||||
_ft=_ft&mask;
|
||||
}
|
||||
_ft++;
|
||||
s=ec_decode(_this,_ft);
|
||||
ec_dec_update(_this,s,s+1,_ft);
|
||||
t=t<<ftb|s;
|
||||
return t;
|
||||
}
|
74
libentcode/entdec.h
Normal file
74
libentcode/entdec.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
#if !defined(_entdec_H)
|
||||
# define _entdec_H (1)
|
||||
# include "entcode.h"
|
||||
|
||||
|
||||
|
||||
typedef struct ec_dec ec_dec;
|
||||
|
||||
|
||||
|
||||
/*The entropy decoder.*/
|
||||
struct ec_dec{
|
||||
/*The buffer to decode.*/
|
||||
ec_byte_buffer *buf;
|
||||
/*The remainder of a buffered input symbol.*/
|
||||
int rem;
|
||||
/*The number of values in the current range.*/
|
||||
ec_uint32 rng;
|
||||
/*The difference between the input value and the lowest value in the current
|
||||
range.*/
|
||||
ec_uint32 dif;
|
||||
/*Normalization factor.*/
|
||||
ec_uint32 nrm;
|
||||
};
|
||||
|
||||
|
||||
/*Initializes the decoder.
|
||||
_buf: The input buffer to use.
|
||||
Return: 0 on success, or a negative value on error.*/
|
||||
void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf);
|
||||
/*Calculates the cumulative frequency for the next symbol.
|
||||
This can then be fed into the probability model to determine what that
|
||||
symbol is, and the additional frequency information required to advance to
|
||||
the next symbol.
|
||||
This function cannot be called more than once without a corresponding call to
|
||||
ec_dec_update(), or decoding will not proceed correctly.
|
||||
_ft: The total frequency of the symbols in the alphabet the next symbol was
|
||||
encoded with.
|
||||
Return: A cumulative frequency representing the encoded symbol.
|
||||
If the cumulative frequency of all the symbols before the one that
|
||||
was encoded was fl, and the cumulative frequency of all the symbols
|
||||
up to and including the one encoded is fh, then the returned value
|
||||
will fall in the range [fl,fh).*/
|
||||
unsigned ec_decode(ec_dec *_this,unsigned _ft);
|
||||
/*Advance the decoder past the next symbol using the frequency information the
|
||||
symbol was encoded with.
|
||||
Exactly one call to ec_decode() must have been made so that all necessary
|
||||
intermediate calculations are performed.
|
||||
_fl: The cumulative frequency of all symbols that come before the symbol
|
||||
decoded.
|
||||
_fh: The cumulative frequency of all symbols up to and including the symbol
|
||||
Together with _fl, this defines the range [_fl,_fh) in which the value
|
||||
returned above must fall.
|
||||
_ft: The total frequency of the symbols in the alphabet the symbol decoded
|
||||
was encoded in.
|
||||
This must be the same as passed to the preceding call to ec_decode().*/
|
||||
void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,
|
||||
unsigned _ft);
|
||||
/*Extracts a sequence of raw bits from the stream.
|
||||
The bits must have been encoded with ec_enc_bits().
|
||||
No call to ec_dec_update() is necessary after this call.
|
||||
_ftb: The number of bits to extract.
|
||||
This must be at least one, and no more than 32.
|
||||
Return: The decoded bits.*/
|
||||
ec_uint32 ec_dec_bits(ec_dec *_this,int _ftb);
|
||||
/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
|
||||
The bits must have been encoded with ec_enc_uint().
|
||||
No call to ec_dec_update() is necessary after this call.
|
||||
_ft: The number of integers that can be decoded (one more than the max).
|
||||
This must be at least one, and no more than 2**32-1.
|
||||
Return: The decoded bits.*/
|
||||
ec_uint32 ec_dec_uint(ec_dec *_this,ec_uint32 _ft);
|
||||
|
||||
#endif
|
98
libentcode/entenc.c
Normal file
98
libentcode/entenc.c
Normal file
|
@ -0,0 +1,98 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "entenc.h"
|
||||
|
||||
|
||||
|
||||
#define EC_BUFFER_INCREMENT (256)
|
||||
|
||||
void ec_byte_writeinit(ec_byte_buffer *_b){
|
||||
_b->ptr=_b->buf=malloc(EC_BUFFER_INCREMENT);
|
||||
_b->storage=EC_BUFFER_INCREMENT;
|
||||
}
|
||||
|
||||
void ec_byte_writetrunc(ec_byte_buffer *_b,long _bytes){
|
||||
_b->ptr=_b->buf+_bytes;
|
||||
}
|
||||
|
||||
void ec_byte_write1(ec_byte_buffer *_b,unsigned _value){
|
||||
ptrdiff_t endbyte;
|
||||
endbyte=_b->ptr-_b->buf;
|
||||
if(endbyte>=_b->storage){
|
||||
_b->buf=realloc(_b->buf,_b->storage+EC_BUFFER_INCREMENT);
|
||||
_b->storage+=EC_BUFFER_INCREMENT;
|
||||
_b->ptr=_b->buf+endbyte;
|
||||
}
|
||||
*(_b->ptr++)=(unsigned char)_value;
|
||||
}
|
||||
|
||||
void ec_byte_write4(ec_byte_buffer *_b,ec_uint32 _value){
|
||||
ptrdiff_t endbyte;
|
||||
endbyte=_b->ptr-_b->buf;
|
||||
if(endbyte+4>_b->storage){
|
||||
_b->buf=realloc(_b->buf,_b->storage+EC_BUFFER_INCREMENT);
|
||||
_b->storage+=EC_BUFFER_INCREMENT;
|
||||
_b->ptr=_b->buf+endbyte;
|
||||
}
|
||||
*(_b->ptr++)=(unsigned char)_value;
|
||||
_value>>=8;
|
||||
*(_b->ptr++)=(unsigned char)_value;
|
||||
_value>>=8;
|
||||
*(_b->ptr++)=(unsigned char)_value;
|
||||
_value>>=8;
|
||||
*(_b->ptr++)=(unsigned char)_value;
|
||||
}
|
||||
|
||||
void ec_byte_writecopy(ec_byte_buffer *_b,void *_source,long _bytes){
|
||||
ptrdiff_t endbyte;
|
||||
endbyte=_b->ptr-_b->buf;
|
||||
if(endbyte+_bytes>_b->storage){
|
||||
_b->storage=endbyte+_bytes+EC_BUFFER_INCREMENT;
|
||||
_b->buf=realloc(_b->buf,_b->storage);
|
||||
_b->ptr=_b->buf+endbyte;
|
||||
}
|
||||
memmove(_b->ptr,_source,_bytes);
|
||||
_b->ptr+=_bytes;
|
||||
}
|
||||
|
||||
void ec_byte_writeclear(ec_byte_buffer *_b){
|
||||
free(_b->buf);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb){
|
||||
unsigned fl;
|
||||
unsigned ft;
|
||||
while(_ftb>EC_UNIT_BITS){
|
||||
_ftb-=EC_UNIT_BITS;
|
||||
fl=(unsigned)(_fl>>_ftb)&EC_UNIT_MASK;
|
||||
ec_encode(_this,fl,fl+1,EC_UNIT_MASK+1);
|
||||
}
|
||||
ft=1<<_ftb;
|
||||
fl=_fl&ft-1;
|
||||
ec_encode(_this,fl,fl+1,ft);
|
||||
}
|
||||
|
||||
void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft){
|
||||
ec_uint32 mask;
|
||||
ec_uint32 ft;
|
||||
unsigned fl;
|
||||
int ftb;
|
||||
_ft--;
|
||||
ftb=EC_ILOG(_ft);
|
||||
while(ftb>EC_UNIT_BITS){
|
||||
ftb-=EC_UNIT_BITS;
|
||||
ft=(_ft>>ftb)+1;
|
||||
fl=(unsigned)(_fl>>ftb);
|
||||
ec_encode(_this,fl,fl+1,ft);
|
||||
if(fl<ft-1){
|
||||
ec_enc_bits(_this,_fl,ftb);
|
||||
return;
|
||||
}
|
||||
mask=((ec_uint32)1<<ftb)-1;
|
||||
_fl=_fl&mask;
|
||||
_ft=_ft&mask;
|
||||
}
|
||||
ec_encode(_this,_fl,_fl+1,_ft+1);
|
||||
}
|
57
libentcode/entenc.h
Normal file
57
libentcode/entenc.h
Normal file
|
@ -0,0 +1,57 @@
|
|||
#if !defined(_entenc_H)
|
||||
# define _entenc_H (1)
|
||||
# include <stddef.h>
|
||||
# include "entcode.h"
|
||||
|
||||
|
||||
|
||||
typedef struct ec_enc ec_enc;
|
||||
|
||||
|
||||
|
||||
/*The entropy encoder.*/
|
||||
struct ec_enc{
|
||||
/*Buffered output.*/
|
||||
ec_byte_buffer *buf;
|
||||
/*A buffered output symbol, awaiting carry propagation.*/
|
||||
int rem;
|
||||
/*Number of extra carry propogating symbols.*/
|
||||
size_t ext;
|
||||
/*The number of values in the current range.*/
|
||||
ec_uint32 rng;
|
||||
/*The low end of the current range (inclusive).*/
|
||||
ec_uint32 low;
|
||||
};
|
||||
|
||||
|
||||
/*Initializes the encoder.
|
||||
_buf: The buffer to store output bytes in.
|
||||
This must have already been initialized for writing and reset.*/
|
||||
void ec_enc_init(ec_enc *_this,ec_byte_buffer *_buf);
|
||||
/*Encodes a symbol given its frequency information.
|
||||
The frequency information must be discernable by the decoder, assuming it
|
||||
has read only the previous symbols from the stream.
|
||||
It is allowable to change the frequency information, or even the entire
|
||||
source alphabet, so long as the decoder can tell from the context of the
|
||||
previously encoded information that it is supposed to do so as well.
|
||||
_fl: The sum of the frequencies of symbols before the one to be encoded.
|
||||
_fs: The frequency of the symbol to be encoded.
|
||||
_ft: The sum of the frequencies of all the symbols*/
|
||||
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fs,unsigned _ft);
|
||||
/*Encodes a sequence of raw bits in the stream.
|
||||
_fl: The bits to encode.
|
||||
_ftb: The number of bits to encode.
|
||||
This must be at least one, and no more than 32.*/
|
||||
void ec_enc_bits(ec_enc *_this,ec_uint32 _fl,int _ftb);
|
||||
/*Encodes a raw unsigned integer in the stream.
|
||||
_fl: The integer to encode.
|
||||
_ft: The number of integers that can be encoded (one more than the max).
|
||||
This must be at least one, and no more than 2**32-1.*/
|
||||
void ec_enc_uint(ec_enc *_this,ec_uint32 _fl,ec_uint32 _ft);
|
||||
|
||||
/*Indicates that there are no more symbols to encode.
|
||||
All reamining output bytes are flushed to the output buffer.
|
||||
ec_enc_init() must be called before the encoder can be used again.*/
|
||||
void ec_enc_done(ec_enc *_this);
|
||||
|
||||
#endif
|
36
libentcode/mfrngcod.h
Normal file
36
libentcode/mfrngcod.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
#if !defined(_mfrngcode_H)
|
||||
# define _mfrngcode_H (1)
|
||||
# include "entcode.h"
|
||||
|
||||
/*Constants used by the entropy encoder/decoder.*/
|
||||
|
||||
/*The number of bits to output at a time.*/
|
||||
# define EC_SYM_BITS (8)
|
||||
/*The total number of bits in each of the state registers.*/
|
||||
# define EC_CODE_BITS (32)
|
||||
/*The maximum symbol value.*/
|
||||
# define EC_SYM_MAX ((1U<<EC_SYM_BITS)-1)
|
||||
/*Bits to shift by to move a symbol into the high-order position.*/
|
||||
# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1)
|
||||
/*Carry bit of the high-order range symbol.*/
|
||||
# define EC_CODE_TOP (1U<<EC_CODE_BITS-1)
|
||||
/*Low-order bit of the high-order range symbol.*/
|
||||
# define EC_CODE_BOT (EC_CODE_TOP>>EC_SYM_BITS)
|
||||
/*Code for which propogating carries are possible.*/
|
||||
# define EC_CODE_CARRY (EC_SYM_MAX<<EC_CODE_SHIFT)
|
||||
/*The number of bits available for the last, partial symbol in the code field.*/
|
||||
# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1)
|
||||
/*A mask for the bits available in the coding buffer.
|
||||
This allows different platforms to use a variable with more bits, if it is
|
||||
convenient.
|
||||
We will only use EC_CODE_BITS of it.*/
|
||||
# define EC_CODE_MASK ((1U<<EC_CODE_BITS-1)-1<<1|1)
|
||||
|
||||
|
||||
/*The non-zero symbol of the second possible reserved ending.
|
||||
This must be the high-bit.*/
|
||||
# define EC_FOF_RSV1 (1<<EC_SYM_BITS-1)
|
||||
/*A mask for all the other bits.*/
|
||||
# define EC_FOF_RSV1_MASK (EC_FOF_RSV1-1)
|
||||
|
||||
#endif
|
260
libentcode/mfrngdec.c
Normal file
260
libentcode/mfrngdec.c
Normal file
|
@ -0,0 +1,260 @@
|
|||
#include <stddef.h>
|
||||
#include "entdec.h"
|
||||
#include "mfrngcod.h"
|
||||
|
||||
|
||||
|
||||
/*A multiply-free range decoder.
|
||||
This is an entropy decoder based upon \cite{Mar79}, which is itself a
|
||||
rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
|
||||
It is very similar to arithmetic encoding, except that encoding is done with
|
||||
digits in any base, instead of with bits, and so it is faster when using
|
||||
larger bases (i.e.: a byte).
|
||||
The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
|
||||
is the base, longer than the theoretical optimum, but to my knowledge there
|
||||
is no published justification for this claim.
|
||||
This only seems true when using near-infinite precision arithmetic so that
|
||||
the process is carried out with no rounding errors.
|
||||
|
||||
IBM (the author's employer) never sought to patent the idea, and to my
|
||||
knowledge the algorithm is unencumbered by any patents, though its
|
||||
performance is very competitive with proprietary arithmetic coding.
|
||||
The two are based on very similar ideas, however.
|
||||
An excellent description of implementation details is available at
|
||||
http://www.arturocampos.com/ac_range.html
|
||||
A recent work \cite{MNW98} which proposes several changes to arithmetic
|
||||
encoding for efficiency actually re-discovers many of the principles
|
||||
behind range encoding, and presents a good theoretical analysis of them.
|
||||
|
||||
The coder is made multiply-free by replacing the standard multiply/divide
|
||||
used to partition the current interval according to the total frequency
|
||||
count.
|
||||
The new partition function scales the count so that it differs from the size
|
||||
of the interval by no more than a factor of two and then assigns each symbol
|
||||
one or two code words in the interval.
|
||||
For details see \cite{SM98}.
|
||||
|
||||
This coder also handles the end of the stream in a slightly more graceful
|
||||
fashion than most arithmetic or range coders.
|
||||
Once the final symbol has been encoded, the coder selects the code word with
|
||||
the shortest number of bits that still falls within the final interval.
|
||||
This method is not novel.
|
||||
Here, by the length of the code word, we refer to the number of bits until
|
||||
its final 1.
|
||||
Any trailing zeros may be discarded, since the encoder, once it runs out of
|
||||
input, will pad its buffer with zeros.
|
||||
|
||||
But this means that no encoded stream would ever have any zero bytes at the
|
||||
end.
|
||||
Since there are some coded representations we cannot produce, it implies that
|
||||
there is still some redundancy in the stream.
|
||||
In this case, we can pick a special byte value, RSV1, and should the stream
|
||||
end in a sequence of zeros, followed by the RSV1 byte, we can code the
|
||||
zeros, and discard the RSV1 byte.
|
||||
The decoder, knowing that the encoder would never produce a sequence of zeros
|
||||
at the end, would then know to add in the RSV1 byte if it observed it.
|
||||
|
||||
Now, the encoder would never produce a stream that ended in a sequence of
|
||||
zeros followed by a RSV1 byte.
|
||||
So, if the stream ends in a non-empty sequence of zeros, followed by any
|
||||
positive number of RSV1 bytes, the last RSV1 byte is discarded.
|
||||
The decoder, if it encounters a stream that ends in non-empty sequence of
|
||||
zeros followed by any non-negative number of RSV1 bytes, adds an additional
|
||||
RSV1 byte to the stream.
|
||||
With this strategy, every possible sequence of input bytes is transformed to
|
||||
one that could actually be produced by the encoder.
|
||||
|
||||
The only question is what non-zero value to use for RSV1.
|
||||
We select 0x80, since it has the nice property of producing the shortest
|
||||
possible byte streams when using our strategy for selecting a number within
|
||||
the final interval to encode.
|
||||
Clearly if the shortest possible code word that falls within the interval has
|
||||
its last one bit as the most significant bit of the final byte, and the
|
||||
previous bytes were a non-empty sequence of zeros followed by a non-negative
|
||||
number of 0x80 bytes, then the last byte would be discarded.
|
||||
If the shortest code word is not so formed, then no other code word in the
|
||||
interval would result in any more bytes being discarded.
|
||||
Any longer code word would have an additional one bit somewhere, and so would
|
||||
require at a minimum that that byte would be coded.
|
||||
If the shortest code word has a 1 before the final one that is preventing the
|
||||
stream from ending in a non-empty sequence of zeros followed by a
|
||||
non-negative number of 0x80's, then there is no code word of the same length
|
||||
which contains that bit as a zero.
|
||||
If there were, then we could simply leave that bit a 1, and drop all the bits
|
||||
after it without leaving the interval, thus producing a shorter code word.
|
||||
|
||||
In this case, RSV1 can only drop 1 bit off the final stream.
|
||||
Other choices could lead to savings of up to 8 bits for particular streams,
|
||||
but this would produce the odd situation that a stream with more non-zero
|
||||
bits is actually encoded in fewer bytes.
|
||||
|
||||
@PHDTHESIS{Pas76,
|
||||
author="Richard Clark Pasco",
|
||||
title="Sorce coding algorithms for fast data compression",
|
||||
school="Dept. of Electrical Engineering, Stanford University",
|
||||
address="Stanford, CA",
|
||||
month=May,
|
||||
year=1976
|
||||
}
|
||||
@INPROCEEDINGS{Mar79,
|
||||
author="Martin, G.N.N.",
|
||||
title="Range encoding: an algorithm for removing redundancy from a digitised
|
||||
message",
|
||||
booktitle="Video & Data Recording Conference",
|
||||
year=1979,
|
||||
address="Southampton",
|
||||
month=Jul
|
||||
}
|
||||
@ARTICLE{MNW98,
|
||||
author="Alistair Moffat and Radford Neal and Ian H. Witten",
|
||||
title="Arithmetic Coding Revisited",
|
||||
journal="{ACM} Transactions on Information Systems",
|
||||
year=1998,
|
||||
volume=16,
|
||||
number=3,
|
||||
pages="256--294",
|
||||
month=Jul,
|
||||
URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
|
||||
}
|
||||
@INPROCEEDINGS{SM98,
|
||||
author="Lang Stuiver and Alistair Moffat",
|
||||
title="Piecewise Integer Mapping for Arithmetic Coding",
|
||||
booktitle="Proceedings of the {IEEE} Data Compression Conference",
|
||||
pages="1--10",
|
||||
address="Snowbird, UT",
|
||||
month="Mar./Apr.",
|
||||
year=1998
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
/*Gets the next byte of input.
|
||||
After all the bytes in the current packet have been consumed, and the extra
|
||||
end code returned if needed, this function will continue to return zero each
|
||||
time it is called.
|
||||
Return: The next byte of input.*/
|
||||
static int ec_dec_in(ec_dec *_this){
|
||||
int ret;
|
||||
ret=ec_byte_read1(_this->buf);
|
||||
if(ret<0){
|
||||
unsigned char *buf;
|
||||
long bytes;
|
||||
bytes=ec_byte_bytes(_this->buf);
|
||||
buf=ec_byte_get_buffer(_this->buf);
|
||||
/*Breaking abstraction: don't do this at home, kids.*/
|
||||
if(_this->buf->storage==bytes){
|
||||
ec_byte_adv1(_this->buf);
|
||||
if(bytes>0){
|
||||
unsigned char *p;
|
||||
p=buf+bytes;
|
||||
/*If we end in a string of 0 or more EC_FOF_RSV1 bytes preceded by a
|
||||
zero, return an extra EC_FOF_RSV1 byte.*/
|
||||
do p--;
|
||||
while(p>buf&&p[0]==EC_FOF_RSV1);
|
||||
if(!p[0])return EC_FOF_RSV1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
else return ret;
|
||||
}
|
||||
|
||||
/*Normalizes the contents of low and rng so that rng is contained in the
|
||||
high-order symbol of low.*/
|
||||
static void ec_dec_normalize(ec_dec *_this){
|
||||
/*If the range is too small, rescale it and input some bits.*/
|
||||
while(_this->rng<=EC_CODE_BOT){
|
||||
int sym;
|
||||
_this->rng<<=EC_SYM_BITS;
|
||||
/*Use up the remaining bits from our last symbol.*/
|
||||
sym=_this->rem<<EC_CODE_EXTRA&EC_SYM_MAX;
|
||||
/*Read the next value from the input.*/
|
||||
_this->rem=ec_dec_in(_this);
|
||||
/*Take the rest of the bits we need from this new symbol.*/
|
||||
sym|=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA;
|
||||
_this->dif=(_this->dif<<EC_SYM_BITS)+sym&EC_CODE_MASK;
|
||||
/*dif can never be larger than EC_CODE_TOP.
|
||||
This is equivalent to the slightly more readable:
|
||||
if(_this->dif>EC_CODE_TOP)_this->dif-=EC_CODE_TOP;*/
|
||||
_this->dif^=(_this->dif&_this->dif-1)&EC_CODE_TOP;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf){
|
||||
_this->buf=_buf;
|
||||
_this->rem=ec_dec_in(_this);
|
||||
_this->rng=1U<<EC_CODE_EXTRA;
|
||||
_this->dif=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA;
|
||||
/*Normalize the interval.*/
|
||||
ec_dec_normalize(_this);
|
||||
}
|
||||
|
||||
unsigned ec_decode(ec_dec *_this,unsigned _ft){
|
||||
unsigned d;
|
||||
/*Step 1: Compute the normalization factor for the frequency counts.*/
|
||||
_this->nrm=EC_ILOG(_this->rng)-EC_ILOG(_ft);
|
||||
_ft<<=_this->nrm;
|
||||
d=_ft>_this->rng;
|
||||
_ft>>=d;
|
||||
_this->nrm-=d;
|
||||
/*Step 2: invert the partition function.*/
|
||||
d=_this->rng-_ft;
|
||||
return EC_MAXI((int)(_this->dif>>1),(int)(_this->dif-d))>>_this->nrm;
|
||||
/*Step 3: The caller locates the range [fl,fh) containing the return value
|
||||
and calls ec_dec_update().*/
|
||||
}
|
||||
|
||||
void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){
|
||||
unsigned r;
|
||||
unsigned s;
|
||||
unsigned d;
|
||||
/*Step 4: Evaluate the two partition function values.*/
|
||||
_fl<<=_this->nrm;
|
||||
_fh<<=_this->nrm;
|
||||
_ft<<=_this->nrm;
|
||||
d=_this->rng-_ft;
|
||||
r=_fh+EC_MINI(_fh,d);
|
||||
s=_fl+EC_MINI(_fl,d);
|
||||
/*Step 5: Update the interval.*/
|
||||
_this->rng=r-s;
|
||||
_this->dif-=s;
|
||||
/*Step 6: Normalize the interval.*/
|
||||
ec_dec_normalize(_this);
|
||||
}
|
||||
|
||||
#if 0
|
||||
int ec_dec_done(ec_dec *_this){
|
||||
unsigned low;
|
||||
int ret;
|
||||
/*Check to make sure we've used all the input bytes.
|
||||
This ensures that no more ones would ever be inserted into the decoder.*/
|
||||
if(_this->buf->ptr-ec_byte_get_buffer(_this->buf)<=
|
||||
ec_byte_bytes(_this->buf)){
|
||||
return 0;
|
||||
}
|
||||
/*We compute the smallest finitely odd fraction that fits inside the current
|
||||
range, and write that to the stream.
|
||||
This is guaranteed to yield the smallest possible encoding.*/
|
||||
/*TODO: Fix this line, as it is wrong.
|
||||
It doesn't seem worth being able to make this check to do an extra
|
||||
subtraction for every symbol decoded.*/
|
||||
low=/*What we want: _this->top-_this->rng; What we have:*/_this->dif
|
||||
if(low){
|
||||
unsigned end;
|
||||
end=EC_CODE_TOP;
|
||||
/*Ensure that the next free end is in the range.*/
|
||||
if(end-low>=_this->rng){
|
||||
unsigned msk;
|
||||
msk=EC_CODE_TOP-1;
|
||||
do{
|
||||
msk>>=1;
|
||||
end=(low+msk)&~msk|msk+1;
|
||||
}
|
||||
while(end-low>=_this->rng);
|
||||
}
|
||||
/*The remaining input should have been the next free end.*/
|
||||
return end-low!=_this->dif;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif
|
164
libentcode/mfrngenc.c
Normal file
164
libentcode/mfrngenc.c
Normal file
|
@ -0,0 +1,164 @@
|
|||
#include <stddef.h>
|
||||
#include "entenc.h"
|
||||
#include "mfrngcod.h"
|
||||
|
||||
|
||||
|
||||
/*A multiply-free range encoder.
|
||||
See mfrngdec.c and the references for implementation details
|
||||
\cite{Mar79,MNW98,SM98}.
|
||||
|
||||
@INPROCEEDINGS{Mar79,
|
||||
author="Martin, G.N.N.",
|
||||
title="Range encoding: an algorithm for removing redundancy from a digitised
|
||||
message",
|
||||
booktitle="Video \& Data Recording Conference",
|
||||
year=1979,
|
||||
address="Southampton",
|
||||
month=Jul
|
||||
}
|
||||
@ARTICLE{MNW98,
|
||||
author="Alistair Moffat and Radford Neal and Ian H. Witten",
|
||||
title="Arithmetic Coding Revisited",
|
||||
journal="{ACM} Transactions on Information Systems",
|
||||
year=1998,
|
||||
volume=16,
|
||||
number=3,
|
||||
pages="256--294",
|
||||
month=Jul,
|
||||
URL="http://dev.acm.org/pubs/citations/journals/tois/1998-16-3/p256-moffat/"
|
||||
}
|
||||
@INPROCEEDINGS{SM98,
|
||||
author="Lang Stuiver and Alistair Moffat",
|
||||
title="Piecewise Integer Mapping for Arithmetic Coding",
|
||||
booktitle="Proceedings of the {IEEE} Data Compression Conference",
|
||||
pages="1--10",
|
||||
address="Snowbird, UT",
|
||||
month="Mar./Apr.",
|
||||
year=1998
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
/*Outputs a symbol, with a carry bit.
|
||||
If there is a potential to propogate a carry over several symbols, they are
|
||||
buffered until it can be determined whether or not an actual carry will
|
||||
occur.
|
||||
If the counter for the buffered symbols overflows, then the range is
|
||||
truncated to force a carry to occur, towards whichever side maximizes the
|
||||
remaining range.*/
|
||||
static void ec_enc_carry_out(ec_enc *_this,int _c){
|
||||
if(_c!=EC_SYM_MAX){
|
||||
/*No further carry propogation possible, flush buffer.*/
|
||||
int carry;
|
||||
carry=_c>>EC_SYM_BITS;
|
||||
/*Don't output a byte on the first write.
|
||||
This compare should be taken care of by branch-prediction thereafter.*/
|
||||
if(_this->rem>=0)ec_byte_write1(_this->buf,_this->rem+carry);
|
||||
if(_this->ext>0){
|
||||
unsigned sym;
|
||||
sym=EC_SYM_MAX+carry&EC_SYM_MAX;
|
||||
do ec_byte_write1(_this->buf,sym);
|
||||
while(--(_this->ext)>0);
|
||||
}
|
||||
_this->rem=_c&EC_SYM_MAX;
|
||||
}
|
||||
else _this->ext++;
|
||||
}
|
||||
|
||||
static void ec_enc_normalize(ec_enc *_this){
|
||||
/*If the range is too small, output some bits and rescale it.*/
|
||||
while(_this->rng<=EC_CODE_BOT){
|
||||
ec_enc_carry_out(_this,(int)(_this->low>>EC_CODE_SHIFT));
|
||||
/*Move the next-to-high-order symbol into the high-order position.*/
|
||||
_this->low=_this->low<<EC_SYM_BITS&EC_CODE_TOP-1;
|
||||
_this->rng<<=EC_SYM_BITS;
|
||||
}
|
||||
}
|
||||
|
||||
void ec_enc_init(ec_enc *_this,ec_byte_buffer *_buf){
|
||||
_this->buf=_buf;
|
||||
_this->rem=-1;
|
||||
_this->ext=0;
|
||||
_this->low=0;
|
||||
_this->rng=EC_CODE_TOP;
|
||||
}
|
||||
|
||||
void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){
|
||||
unsigned r;
|
||||
unsigned s;
|
||||
unsigned d;
|
||||
int nrm;
|
||||
/*Step 1: we want ft in the range of [rng/2,rng).
|
||||
The high-order bits of the rng and ft are computed via a logarithm.
|
||||
This could also be done on some architectures with some custom assembly,
|
||||
which would provide even more speed.*/
|
||||
nrm=EC_ILOG(_this->rng)-EC_ILOG(_ft);
|
||||
/*Having the same high order bit may be too much.
|
||||
We may need to shift one less to ensure that ft is actually in the proper
|
||||
range.*/
|
||||
_ft<<=nrm;
|
||||
d=_ft>_this->rng;
|
||||
_ft>>=d;
|
||||
nrm-=d;
|
||||
/*We then scale everything by the computed power of 2.*/
|
||||
_fl<<=nrm;
|
||||
_fh<<=nrm;
|
||||
/*Step 2: compute the two values of the partition function.
|
||||
d is the splitting point of the interval [0,ft).*/
|
||||
d=_this->rng-_ft;
|
||||
r=_fh+EC_MINI(_fh,d);
|
||||
s=_fl+EC_MINI(_fl,d);
|
||||
/*Step 3: Update the end-point and range of the interval.*/
|
||||
_this->low+=s;
|
||||
_this->rng=r-s;
|
||||
/*Step 4: Normalize the interval.*/
|
||||
ec_enc_normalize(_this);
|
||||
}
|
||||
|
||||
void ec_enc_done(ec_enc *_this){
|
||||
/*We compute the integer in the current interval that has the largest number
|
||||
of trailing zeros, and write that to the stream.
|
||||
This is guaranteed to yield the smallest possible encoding.*/
|
||||
if(_this->low){
|
||||
unsigned end;
|
||||
end=EC_CODE_TOP;
|
||||
/*Ensure that the end value is in the range.*/
|
||||
if(end-_this->low>=_this->rng){
|
||||
unsigned msk;
|
||||
msk=EC_CODE_TOP-1;
|
||||
do{
|
||||
msk>>=1;
|
||||
end=(_this->low+msk)&~msk|msk+1;
|
||||
}
|
||||
while(end-_this->low>=_this->rng);
|
||||
}
|
||||
/*The remaining output is the next free end.*/
|
||||
while(end){
|
||||
ec_enc_carry_out(_this,end>>EC_CODE_SHIFT);
|
||||
end=end<<EC_SYM_BITS&EC_CODE_TOP-1;
|
||||
}
|
||||
}
|
||||
/*If we have a buffered byte...*/
|
||||
if(_this->rem>=0){
|
||||
unsigned char *p;
|
||||
unsigned char *buf;
|
||||
/*Flush it into the output buffer.*/
|
||||
ec_enc_carry_out(_this,0);
|
||||
/*We may be able to drop some redundant bytes from the end.*/
|
||||
buf=ec_byte_get_buffer(_this->buf);
|
||||
p=buf+ec_byte_bytes(_this->buf)-1;
|
||||
/*Strip trailing zeros.*/
|
||||
while(p>=buf&&!p[0])p--;
|
||||
/*Strip one trailing EC_FOF_RSV1 byte if the buffer ends in a string of
|
||||
consecutive EC_FOF_RSV1 bytes preceded by one (or more) zeros.*/
|
||||
if(p>buf&&p[0]==EC_FOF_RSV1){
|
||||
unsigned char *q;
|
||||
q=p;
|
||||
do q--;
|
||||
while(q>buf&&q[0]==EC_FOF_RSV1);
|
||||
if(!q[0])p--;
|
||||
}
|
||||
ec_byte_writetrunc(_this->buf,p+1-buf);
|
||||
}
|
||||
}
|
59
libentcode/probdec.c
Normal file
59
libentcode/probdec.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "probdec.h"
|
||||
#include "bitrdec.h"
|
||||
|
||||
|
||||
|
||||
/*Gets the cumulative frequency count between _lo and _hi, as well as the
|
||||
cumulative frequency count below _lo.*/
|
||||
static unsigned ec_probmod_get_total(ec_probmod *_this,unsigned *_fl,
|
||||
unsigned _lo,unsigned _hi){
|
||||
*_fl=ec_bitree_get_cumul(_this->bitree,_lo);
|
||||
return ec_bitree_get_cumul(_this->bitree,_hi)-*_fl;
|
||||
}
|
||||
|
||||
static int ec_probmod_find_and_update(ec_probmod *_this,ec_probsamp *_samp,
|
||||
unsigned _freq){
|
||||
int sym;
|
||||
sym=ec_bitree_find_and_update(_this->bitree,_this->sz,_this->split,
|
||||
_freq,&_samp->fl,_this->inc);
|
||||
_samp->fs=ec_bitree_get_freq(_this->bitree,sym)-_this->inc;
|
||||
_this->ft+=_this->inc;
|
||||
if(_this->ft>_this->thresh){
|
||||
ec_bitree_halve(_this->bitree,_this->sz,_this->split);
|
||||
_this->ft=ec_bitree_get_cumul(_this->bitree,_this->sz);
|
||||
}
|
||||
return sym;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ec_probmod_read(ec_probmod *_this,ec_dec *_dec){
|
||||
ec_probsamp samp;
|
||||
unsigned freq;
|
||||
int sym;
|
||||
samp.ft=_this->ft;
|
||||
freq=ec_decode(_dec,samp.ft);
|
||||
sym=ec_probmod_find_and_update(_this,&samp,freq);
|
||||
ec_dec_update(_dec,samp.fl,samp.fl+samp.fs,samp.ft);
|
||||
return sym;
|
||||
}
|
||||
|
||||
int ec_probmod_read_range(ec_probmod *_this,ec_dec *_dec,int _lo,int _hi){
|
||||
ec_probsamp samp;
|
||||
unsigned freq;
|
||||
unsigned base;
|
||||
int sz;
|
||||
int sym;
|
||||
sz=_this->sz;
|
||||
_lo=EC_MINI(_lo,sz);
|
||||
_hi=EC_MINI(_hi,sz);
|
||||
if(_hi<=_lo)return -1;
|
||||
samp.ft=ec_probmod_get_total(_this,&base,_lo,_hi);
|
||||
freq=ec_decode(_dec,samp.ft);
|
||||
sym=ec_probmod_find_and_update(_this,&samp,freq+base);
|
||||
samp.fl-=base;
|
||||
ec_dec_update(_dec,samp.fl,samp.fl+samp.fs,samp.ft);
|
||||
return sym;
|
||||
}
|
23
libentcode/probdec.h
Normal file
23
libentcode/probdec.h
Normal file
|
@ -0,0 +1,23 @@
|
|||
#if !defined(_probdec_H)
|
||||
# define _probdec_H (1)
|
||||
# include "probmod.h"
|
||||
# include "entdec.h"
|
||||
|
||||
|
||||
|
||||
/*Decodes a single symbol using the given probability model and entropy
|
||||
decoder.
|
||||
Return: The decoded symbol.*/
|
||||
int ec_probmod_read(ec_probmod *_this,ec_dec *_dec);
|
||||
/*Decodes a single symbol using the given probability model and entropy
|
||||
decoder, restricted to a given subrange of the available symbols.
|
||||
This effectively sets the frequency counts of all the symbols outside this
|
||||
range to zero, decodes the symbol, then restores the counts to their
|
||||
original values, and updates the model.
|
||||
_lo: The first legal symbol to decode.
|
||||
_hi: One greater than the last legal symbol to decode.
|
||||
This must be greater than _lo.
|
||||
Return: The decoded symbol.*/
|
||||
int ec_probmod_read_range(ec_probmod *_this,ec_dec *_dec,int _lo,int _hi);
|
||||
|
||||
#endif
|
50
libentcode/probenc.c
Normal file
50
libentcode/probenc.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
#include <string.h>
|
||||
#include "probenc.h"
|
||||
#include "bitrenc.h"
|
||||
|
||||
|
||||
|
||||
static void ec_probmod_samp_and_update(ec_probmod *_this,ec_probsamp *_samp,
|
||||
unsigned _sym){
|
||||
unsigned sz;
|
||||
sz=_this->sz;
|
||||
_samp->fs=ec_bitree_get_freq(_this->bitree,_sym);
|
||||
_samp->fl=ec_bitree_get_cumul(_this->bitree,_sym);
|
||||
_samp->ft=_this->ft;
|
||||
ec_bitree_update(_this->bitree,sz,_sym,_this->inc);
|
||||
_this->ft+=_this->inc;
|
||||
if(_this->ft>_this->thresh){
|
||||
ec_bitree_halve(_this->bitree,sz,_this->split);
|
||||
_this->ft=ec_bitree_get_cumul(_this->bitree,sz);
|
||||
}
|
||||
}
|
||||
|
||||
static void ec_probmod_samp_and_update_range(ec_probmod *_this,
|
||||
ec_probsamp *_samp,int _sym,int _lo,int _hi){
|
||||
unsigned base;
|
||||
int sz;
|
||||
sz=_this->sz;
|
||||
base=ec_bitree_get_cumul(_this->bitree,_lo);
|
||||
_samp->fs=ec_bitree_get_freq(_this->bitree,_sym);
|
||||
_samp->fl=ec_bitree_get_cumul(_this->bitree,_sym)-base;
|
||||
_samp->ft=ec_bitree_get_cumul(_this->bitree,_hi)-base;
|
||||
ec_bitree_update(_this->bitree,sz,_sym,_this->inc);
|
||||
_this->ft+=_this->inc;
|
||||
if(_this->ft>_this->thresh){
|
||||
ec_bitree_halve(_this->bitree,sz,_this->split);
|
||||
_this->ft=ec_bitree_get_cumul(_this->bitree,sz);
|
||||
}
|
||||
}
|
||||
|
||||
void ec_probmod_write(ec_probmod *_this,ec_enc *_enc,int _sym){
|
||||
ec_probsamp samp;
|
||||
ec_probmod_samp_and_update(_this,&samp,_sym);
|
||||
ec_encode(_enc,samp.fl,samp.fl+samp.fs,samp.ft);
|
||||
}
|
||||
|
||||
void ec_probmod_write_range(ec_probmod *_this,ec_enc *_enc,int _sym,
|
||||
int _lo,int _hi){
|
||||
ec_probsamp samp;
|
||||
ec_probmod_samp_and_update_range(_this,&samp,_sym,_lo,_hi);
|
||||
ec_encode(_enc,samp.fl,samp.fl+samp.fs,samp.ft);
|
||||
}
|
23
libentcode/probenc.h
Normal file
23
libentcode/probenc.h
Normal file
|
@ -0,0 +1,23 @@
|
|||
#if !defined(_probenc_H)
|
||||
# define _probenc_H (1)
|
||||
# include "probmod.h"
|
||||
# include "entenc.h"
|
||||
|
||||
/*Encodes a single symbol using the given probability model and entropy
|
||||
encoder.
|
||||
_sym: The symbol to encode.*/
|
||||
void ec_probmod_write(ec_probmod *_this,ec_enc *_enc,int _sym);
|
||||
/*Encodes a single symbol using the given probability model and entropy
|
||||
encoder, restricted to a given subrange of the available symbols.
|
||||
This effectively sets the frequency counts of all the symbols outside this
|
||||
range to zero, encodes the symbol, then restores the counts to their
|
||||
original values, and updates the models.
|
||||
_sym: The symbol to encode.
|
||||
The caller must ensure this falls in the range _lo<=_sym<_hi.
|
||||
_lo: The first legal symbol to encode.
|
||||
_hi: One greater than the last legal symbol to encode.
|
||||
This must be greater than _lo.*/
|
||||
void ec_probmod_write_range(ec_probmod *_this,ec_enc *_enc,int _sym,
|
||||
int _lo,int _hi);
|
||||
|
||||
#endif
|
32
libentcode/probmod.c
Normal file
32
libentcode/probmod.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "probmod.h"
|
||||
#include "bitree.h"
|
||||
|
||||
void ec_probmod_init(ec_probmod *_this,unsigned _sz){
|
||||
ec_probmod_init_full(_this,_sz,1U,1U<<23,NULL);
|
||||
}
|
||||
|
||||
void ec_probmod_init_from_counts(ec_probmod *_this,unsigned _sz,
|
||||
const unsigned *_counts){
|
||||
ec_probmod_init_full(_this,_sz,1U,1U<<23,_counts);
|
||||
}
|
||||
|
||||
void ec_probmod_init_full(ec_probmod *_this,unsigned _sz,unsigned _inc,
|
||||
unsigned _thresh,const unsigned *_counts){
|
||||
unsigned s;
|
||||
_this->sz=_sz;
|
||||
for(s=1;s<=_this->sz;s<<=1);
|
||||
_this->split=s>>1;
|
||||
_this->inc=_inc;
|
||||
_this->thresh=_thresh;
|
||||
_this->bitree=(unsigned *)malloc(_sz*sizeof(*_this->bitree));
|
||||
if(_counts!=NULL)memcpy(_this->bitree,_counts,_sz*sizeof(*_this->bitree));
|
||||
else for(s=0;s<_this->sz;s++)_this->bitree[s]=1;
|
||||
ec_bitree_from_counts(_this->bitree,_sz);
|
||||
_this->ft=ec_bitree_get_cumul(_this->bitree,_sz);
|
||||
}
|
||||
|
||||
void ec_probmod_clear(ec_probmod *_this){
|
||||
free(_this->bitree);
|
||||
}
|
73
libentcode/probmod.h
Normal file
73
libentcode/probmod.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
#if !defined(_probmod_H)
|
||||
# define _probmod_H (1)
|
||||
# include "entcode.h"
|
||||
|
||||
typedef struct ec_probsamp ec_probsamp;
|
||||
typedef struct ec_probmod ec_probmod;
|
||||
|
||||
/*A sample from a probability distribution.
|
||||
This is the information needed to encode a symbol or update the decoder
|
||||
state.*/
|
||||
struct ec_probsamp{
|
||||
/*The cumulative frequency of all symbols preceding this one in the
|
||||
alphabet.*/
|
||||
unsigned fl;
|
||||
/*The frequency of the symbol coded.*/
|
||||
unsigned fs;
|
||||
/*The total frequency of all symbols in the alphabet.*/
|
||||
unsigned ft;
|
||||
};
|
||||
|
||||
|
||||
/*A simple frequency-count probability model.*/
|
||||
struct ec_probmod{
|
||||
/*The number of symbols in this context.*/
|
||||
int sz;
|
||||
/*The largest power of two less than or equal to sz.*/
|
||||
int split;
|
||||
/*The amount by which to increment the frequency count of an observed
|
||||
symbol.*/
|
||||
unsigned inc;
|
||||
/*The current total frequency count.*/
|
||||
unsigned ft;
|
||||
/*The maximum total frequency count allowed before the counts are rescaled.
|
||||
Note that this should be larger than (inc+1>>1)+sz-1, since at most one
|
||||
rescaling is done per decoded symbol.
|
||||
Otherwise, this threshold might be exceeded.
|
||||
This must be less than 2**23 for a range coder, and 2**31 for an
|
||||
arithmetic coder.*/
|
||||
unsigned thresh;
|
||||
/*The binary indexed tree used to keep track of the frequency counts.*/
|
||||
unsigned *bitree;
|
||||
};
|
||||
|
||||
|
||||
/*Initializes a probability model of the given size.
|
||||
The amount to increment and all frequency counts are initialized to 1.
|
||||
The rescaling threshold is initialized to 2**23.
|
||||
_sz: The number of symbols in this context.*/
|
||||
void ec_probmod_init(ec_probmod *_this,unsigned _sz);
|
||||
/*Initializes a probability model of the given size.
|
||||
The amount to increment is initialized to 1.
|
||||
The rescaling threshold is initialized to 2**23.
|
||||
_sz: The number of symbols in this context.
|
||||
_counts: The initial frequency count of each symbol.*/
|
||||
void ec_probmod_init_from_counts(ec_probmod *_this,unsigned _sz,
|
||||
const unsigned *_counts);
|
||||
/*Initializes a probability model of the given size.
|
||||
_sz: The number of symbols in this context.
|
||||
_inc: The amount by which to increment the frequency count of an observed
|
||||
symbol.
|
||||
_thresh: The maximum total frequency count allowed before the counts are
|
||||
rescaled.
|
||||
See above for restrictions on this value.
|
||||
_counts: The initial frequency count of each symbol, or NULL to initialize
|
||||
each frequency count to 1.*/
|
||||
void ec_probmod_init_full(ec_probmod *_this,unsigned _sz,unsigned _inc,
|
||||
unsigned _thresh,const unsigned *_counts);
|
||||
/*Frees all memory used by this probability model.*/
|
||||
void ec_probmod_clear(ec_probmod *_this);
|
||||
|
||||
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue