From 006273c59f35ca10f1ab9b4cad36fdd7550b754b Mon Sep 17 00:00:00 2001 From: "Timothy B. Terriberry" Date: Tue, 21 May 2013 18:16:13 -0700 Subject: [PATCH] Use a table for PVQ encoding/decoding. 58.4% speedup (2.4x faster) on test_unit_cwrs32 (no custom modes). Gives a 3.2% speedup on ./opus_demo restricted-lowdelay 48000 2 96000 comp48-stereo.sw /dev/null on a 600 MHz Cortex A8. --- celt/cwrs.c | 615 +++++++++++++++++----------------- celt/modes.c | 8 + celt/tests/test_unit_cwrs32.c | 108 ++---- 3 files changed, 342 insertions(+), 389 deletions(-) diff --git a/celt/cwrs.c b/celt/cwrs.c index 8edc919d..e6a25e92 100644 --- a/celt/cwrs.c +++ b/celt/cwrs.c @@ -71,64 +71,6 @@ int log2_frac(opus_uint32 val, int frac) } #endif -#ifndef SMALL_FOOTPRINT - -#define MASK32 (0xFFFFFFFF) - -/*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/ -static const opus_uint32 INV_TABLE[53]={ - 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7, - 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF, - 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7, - 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF, - 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97, - 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF, - 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587, - 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF, - 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977, - 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF, - 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67, - 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F, - 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57, - 0xD8FD8FD9, -}; - -/*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact. - _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result - fits in 32 bits, but currently the table for multiplicative inverses is only - valid for _d<=52.*/ -static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b, - opus_uint32 _c,int _d){ - celt_assert(_d<=52); - return (_a*_b-_c)*INV_TABLE[_d]&MASK32; -} - -/*Computes (_a*_b-_c)/_d when the quotient is known to be exact. - _d does not actually have to be even, but imusdiv32odd will be faster when - it's odd, so you should use that instead. - _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the - table for multiplicative inverses is only valid for _d<=54). - _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in - 32 bits.*/ -static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b, - opus_uint32 _c,int _d){ - opus_uint32 inv; - int mask; - int shift; - int one; - celt_assert(_d>0); - celt_assert(_d<=54); - shift=EC_ILOG(_d^(_d-1)); - inv=INV_TABLE[(_d-1)>>shift]; - shift--; - one=1<>shift)-(_c>>shift)+ - ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32; -} - -#endif /* SMALL_FOOTPRINT */ - /*Although derived separately, the pulse vector coding scheme is equivalent to a Pyramid Vector Quantizer \cite{Fis86}. Some additional notes about an early version appear at @@ -248,46 +190,301 @@ static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b, year=1986 }*/ -#ifndef SMALL_FOOTPRINT -/*Compute U(2,_k). - Note that this may be called with _k=32768 (maxK[2]+1).*/ -static inline unsigned ucwrs2(unsigned _k){ - celt_assert(_k>0); - return _k+(_k-1); +#if !defined(SMALL_FOOTPRINT) + +/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/ +# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)]) +/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N + with K pulses allocated to it.*/ +# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1)) + +/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)). + Thus, the number of entries in row I is the larger of the maximum number of + pulses we will ever allocate for a given N=I (K=128, or however many fit in + 32 bits, whichever is smaller), plus one, and the maximum N for which + K=I-1 pulses fit in 32 bits. + The largest band size in an Opus Custom mode is 208. + Otherwise, we can limit things to the set of N which can be achieved by + splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48, + 44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/ +#if defined(CUSTOM_MODES) +static const opus_uint32 CELT_PVQ_U_DATA[1488]={ +#else +static const opus_uint32 CELT_PVQ_U_DATA[1272]={ +#endif + /*N=0, K=0...176:*/ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#if defined(CUSTOM_MODES) + /*...208:*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, +#endif + /*N=1, K=1...176:*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +#if defined(CUSTOM_MODES) + /*...208:*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +#endif + /*N=2, K=2...176:*/ + 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, + 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, + 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143, + 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, + 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, + 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, + 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, + 265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293, + 295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323, + 325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351, +#if defined(CUSTOM_MODES) + /*...208:*/ + 353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381, + 383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411, + 413, 415, +#endif + /*N=3, K=3...176:*/ + 13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613, + 685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861, + 1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785, + 3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385, + 6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661, + 9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961, + 13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745, + 17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013, + 21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765, + 26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001, + 31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721, + 37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925, + 43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613, + 50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785, + 57461, 58141, 58825, 59513, 60205, 60901, 61601, +#if defined(CUSTOM_MODES) + /*...208:*/ + 62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565, + 70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013, + 78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113, +#endif + /*N=4, K=4...176:*/ + 63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017, + 7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775, + 30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153, + 82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193, + 161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575, + 267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217, + 410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951, + 597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609, + 833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023, + 1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407, + 1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759, + 1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175, + 2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751, + 2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583, + 3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767, + 3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399, + 4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575, + 5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391, + 6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943, + 7085049, 7207551, +#if defined(CUSTOM_MODES) + /*...208:*/ + 7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783, + 8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967, + 9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199, + 10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177, + 11912575, +#endif + /*N=5, K=5...176:*/ + 321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041, + 50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401, + 330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241, + 1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241, + 2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801, + 4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849, + 8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849, + 13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809, + 20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881, + 29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641, + 40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081, + 55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609, + 73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049, + 95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641, + 122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041, + 155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321, + 193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969, + 238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889, + 290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401, + 351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241, + 420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561, + 500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929, + 590359041, 604167209, 618216201, 632508801, +#if defined(CUSTOM_MODES) + /*...208:*/ + 647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241, + 755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161, + 878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329, + 1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041, + 1143412929, 1166053121, 1189027881, 1212340489, 1235994241, +#endif + /*N=6, K=6...96:*/ + 1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047, + 335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409, + 2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793, + 11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455, + 29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189, + 64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651, + 128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185, + 235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647, + 402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229, + 655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283, + 1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135, + 1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187, + 2011371957, 2120032959, +#if defined(CUSTOM_MODES) + /*...109:*/ + 2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U, + 3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U, + 4012305913U, +#endif + /*N=7, K=7...54*/ + 8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777, + 1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233, + 19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013, + 88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805, + 292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433, + 793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821, + 1667010073, 1870535785, 2094367717, +#if defined(CUSTOM_MODES) + /*...60:*/ + 2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U, +#endif + /*N=8, K=8...37*/ + 48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767, + 9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017, + 104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351, + 638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615, + 2229491905U, +#if defined(CUSTOM_MODES) + /*...40:*/ + 2691463695U, 3233240945U, 3866006015U, +#endif + /*N=9, K=9...28:*/ + 265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777, + 39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145, + 628496897, 872893441, 1196924561, 1621925137, 2173806145U, +#if defined(CUSTOM_MODES) + /*...29:*/ + 2883810113U, +#endif + /*N=10, K=10...24:*/ + 1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073, + 254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U, + 3375210671U, + /*N=11, K=11...19:*/ + 8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585, + 948062325, 1616336765, +#if defined(CUSTOM_MODES) + /*...20:*/ + 2684641785U, +#endif + /*N=12, K=12...18:*/ + 45046719, 103274625, 224298231, 464387817, 921406335, 1759885185, + 3248227095U, + /*N=13, K=13...16:*/ + 251595969, 579168825, 1267854873, 2653649025U, + /*N=14, K=14:*/ + 1409933619 +}; + +#if defined(CUSTOM_MODES) +const opus_uint32 *const CELT_PVQ_U_ROW[15]={ + CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415, + CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030, + CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389, + CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455, + CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473 +}; +#else +const opus_uint32 *const CELT_PVQ_U_ROW[15]={ + CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351, + CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870, + CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178, + CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240, + CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257 +}; +#endif + +#if defined(CUSTOM_MODES) +void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ + int k; + /*_maxk==0 => there's nothing to do.*/ + celt_assert(_maxk>0); + _bits[0]=0; + for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac); +} +#endif + +static opus_uint32 icwrs(int _n,const int *_y){ + opus_uint32 i; + int j; + int k; + celt_assert(_n>=2); + j=_n-1; + i=_y[j]<0; + k=abs(_y[j]); + do{ + j--; + i+=CELT_PVQ_U(_n-j,k); + k+=abs(_y[j]); + if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1); + } + while(j>0); + return i; } -/*Compute V(2,_k).*/ -static inline opus_uint32 ncwrs2(int _k){ +void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ celt_assert(_k>0); - return 4*(opus_uint32)_k; + ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); } -/*Compute U(3,_k). - Note that this may be called with _k=32768 (maxK[3]+1).*/ -static inline opus_uint32 ucwrs3(unsigned _k){ +static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ celt_assert(_k>0); - return (2*(opus_uint32)_k-2)*_k+1; + celt_assert(_n>0); + do{ + opus_uint32 p; + int s; + int k0; + /*Are the pulses in this dimension negative?*/ + p=CELT_PVQ_U(_n,_k+1); + s=-(_i>=p); + _i-=p&s; + /*Count how many pulses were placed in this dimension.*/ + k0=_k; + for(p=CELT_PVQ_U(_n,_k);p>_i;p=CELT_PVQ_U(_n,_k))_k--; + _i-=p; + *_y++=(k0-_k+s)^s; + } + while(--_n>0); } -/*Compute V(3,_k).*/ -static inline opus_uint32 ncwrs3(int _k){ - celt_assert(_k>0); - return 2*(2*(unsigned)_k*(opus_uint32)_k+1); +void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } -/*Compute U(4,_k).*/ -static inline opus_uint32 ucwrs4(int _k){ - celt_assert(_k>0); - return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1); -} - -/*Compute V(4,_k).*/ -static inline opus_uint32 ncwrs4(int _k){ - celt_assert(_k>0); - return ((_k*(opus_uint32)_k+2)*_k)/3<<3; -} - -#endif /* SMALL_FOOTPRINT */ +#else /* SMALL_FOOTPRINT */ /*Computes the next row/column of any recurrence that obeys the relation u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. @@ -332,125 +529,18 @@ static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ celt_assert(len>=3); _u[0]=0; _u[1]=um2=1; -#ifndef SMALL_FOOTPRINT - /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE, - but _k isn't tested here because k<=52 for n=7*/ - if(_n<=6) -#endif - { - /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ - /*If _n==1, _u[i] should be 1 for i>1.*/ - celt_assert(_n>=2); - /*If _k==0, the following do-while loop will overflow the buffer.*/ - celt_assert(_k>0); - k=2; - do _u[k]=(k<<1)-1; - while(++k=len)break; - _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1; - } - } -#endif /* SMALL_FOOTPRINT */ + /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ + /*If _n==1, _u[i] should be 1 for i>1.*/ + celt_assert(_n>=2); + /*If _k==0, the following do-while loop will overflow the buffer.*/ + celt_assert(_k>0); + k=2; + do _u[k]=(k<<1)-1; + while(++k=p); - _i-=p&s; - yj=_k; - _k=(_i+1)>>1; - p=_k?ucwrs2(_k):0; - _i-=p; - yj-=_k; - _y[0]=(yj+s)^s; - cwrsi1(_k,_i,_y+1); -} - -/*Returns the _i'th combination of _k elements (at most 32767) chosen from a - set of size 3 with associated sign bits. - _y: Returns the vector of pulses.*/ -static void cwrsi3(int _k,opus_uint32 _i,int *_y){ - opus_uint32 p; - int s; - int yj; - p=ucwrs3(_k+1U); - s=-(_i>=p); - _i-=p&s; - yj=_k; - /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all - _i<2147418113=U(3,32768)).*/ - _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0; - p=_k?ucwrs3(_k):0; - _i-=p; - yj-=_k; - _y[0]=(yj+s)^s; - cwrsi2(_k,_i,_y+1); -} - -/*Returns the _i'th combination of _k elements (at most 1172) chosen from a set - of size 4 with associated sign bits. - _y: Returns the vector of pulses.*/ -static void cwrsi4(int _k,opus_uint32 _i,int *_y){ - opus_uint32 p; - int s; - int yj; - int kl; - int kr; - p=ucwrs4(_k+1); - s=-(_i>=p); - _i-=p&s; - yj=_k; - /*We could solve a cubic for k here, but the form of the direct solution does - not lend itself well to exact integer arithmetic. - Instead we do a binary search on U(4,K).*/ - kl=0; - kr=_k; - for(;;){ - _k=(kl+kr)>>1; - p=_k?ucwrs4(_k):0; - if(p<_i){ - if(_k>=kr)break; - kl=_k+1; - } - else if(p>_i)kr=_k-1; - else break; - } - _i-=p; - yj-=_k; - _y[0]=(yj+s)^s; - cwrsi3(_k,_i,_y+1); -} - -#endif /* SMALL_FOOTPRINT */ - /*Returns the _i'th combination of _k elements chosen from a set of size _n with associated sign bits. _y: Returns the vector of pulses. @@ -487,55 +577,6 @@ static inline opus_uint32 icwrs1(const int *_y,int *_k){ return _y[0]<0; } -#ifndef SMALL_FOOTPRINT - -/*Returns the index of the given combination of K elements chosen from a set - of size 2 with associated sign bits. - _y: The vector of pulses, whose sum of absolute values is K. - _k: Returns K.*/ -static inline opus_uint32 icwrs2(const int *_y,int *_k){ - opus_uint32 i; - int k; - i=icwrs1(_y+1,&k); - i+=k?ucwrs2(k):0; - k+=abs(_y[0]); - if(_y[0]<0)i+=ucwrs2(k+1U); - *_k=k; - return i; -} - -/*Returns the index of the given combination of K elements chosen from a set - of size 3 with associated sign bits. - _y: The vector of pulses, whose sum of absolute values is K. - _k: Returns K.*/ -static inline opus_uint32 icwrs3(const int *_y,int *_k){ - opus_uint32 i; - int k; - i=icwrs2(_y+1,&k); - i+=k?ucwrs3(k):0; - k+=abs(_y[0]); - if(_y[0]<0)i+=ucwrs3(k+1U); - *_k=k; - return i; -} - -/*Returns the index of the given combination of K elements chosen from a set - of size 4 with associated sign bits. - _y: The vector of pulses, whose sum of absolute values is K. - _k: Returns K.*/ -static inline opus_uint32 icwrs4(const int *_y,int *_k){ - opus_uint32 i; - int k; - i=icwrs3(_y+1,&k); - i+=k?ucwrs4(k):0; - k+=abs(_y[0]); - if(_y[0]<0)i+=ucwrs4(k+1); - *_k=k; - return i; -} - -#endif /* SMALL_FOOTPRINT */ - /*Returns the index of the given combination of K elements chosen from a set of size _n with associated sign bits. _y: The vector of pulses, whose sum of absolute values must be _k. @@ -543,8 +584,8 @@ static inline opus_uint32 icwrs4(const int *_y,int *_k){ static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, opus_uint32 *_u){ opus_uint32 i; - int j; - int k; + int j; + int k; /*We can't unroll the first two iterations of the loop unless _n>=2.*/ celt_assert(_n>=2); _u[0]=0; @@ -589,57 +630,23 @@ void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ opus_uint32 i; + VARDECL(opus_uint32,u); + opus_uint32 nc; + SAVE_STACK; celt_assert(_k>0); -#ifndef SMALL_FOOTPRINT - switch(_n){ - case 2:{ - i=icwrs2(_y,&_k); - ec_enc_uint(_enc,i,ncwrs2(_k)); - }break; - case 3:{ - i=icwrs3(_y,&_k); - ec_enc_uint(_enc,i,ncwrs3(_k)); - }break; - case 4:{ - i=icwrs4(_y,&_k); - ec_enc_uint(_enc,i,ncwrs4(_k)); - }break; - default: - { -#endif - VARDECL(opus_uint32,u); - opus_uint32 nc; - SAVE_STACK; - ALLOC(u,_k+2U,opus_uint32); - i=icwrs(_n,_k,&nc,_y,u); - ec_enc_uint(_enc,i,nc); - RESTORE_STACK; -#ifndef SMALL_FOOTPRINT - } - break; - } -#endif + ALLOC(u,_k+2U,opus_uint32); + i=icwrs(_n,_k,&nc,_y,u); + ec_enc_uint(_enc,i,nc); + RESTORE_STACK; } -void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec) -{ +void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + VARDECL(opus_uint32,u); + SAVE_STACK; celt_assert(_k>0); -#ifndef SMALL_FOOTPRINT - switch(_n){ - case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break; - case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break; - case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break; - default: - { -#endif - VARDECL(opus_uint32,u); - SAVE_STACK; - ALLOC(u,_k+2U,opus_uint32); - cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); - RESTORE_STACK; -#ifndef SMALL_FOOTPRINT - } - break; - } -#endif + ALLOC(u,_k+2U,opus_uint32); + cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + RESTORE_STACK; } + +#endif /* SMALL_FOOTPRINT */ diff --git a/celt/modes.c b/celt/modes.c index ed204d7d..42e68e1c 100644 --- a/celt/modes.c +++ b/celt/modes.c @@ -345,6 +345,14 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error) mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands); if (mode->eBands==NULL) goto failure; +#if !defined(SMALL_FOOTPRINT) + /* Make sure we don't allocate a band larger than our PVQ table. + 208 should be enough, but let's be paranoid. */ + if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])< + 208) { + goto failure; + } +#endif mode->effEBands = mode->nbEBands; while (mode->eBands[mode->effEBands] > mode->shortMdctSize) diff --git a/celt/tests/test_unit_cwrs32.c b/celt/tests/test_unit_cwrs32.c index 4695f2dd..ac2a8d12 100644 --- a/celt/tests/test_unit_cwrs32.c +++ b/celt/tests/test_unit_cwrs32.c @@ -53,22 +53,20 @@ #ifdef TEST_CUSTOM_MODES -#define NDIMS (46) +#define NDIMS (44) static const int pn[NDIMS]={ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26, 28, 30, 32, 36, 40, 44, 48, 52, 56, 60, 64, 72, 80, 88, 96, 104, - 112, 120, 128, 144, 160, 176, 192, 208, 224, - 240 + 112, 120, 128, 144, 160, 176, 192, 208 }; static const int pkmax[NDIMS]={ 128, 128, 128, 128, 88, 52, 36, 26, 22, 18, 16, 15, 13, 12, 12, 11, 10, 9, 9, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, - 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4 + 4, 4, 4, 4, 4, 4, 4, 4 }; #else /* TEST_CUSTOM_MODES */ @@ -97,27 +95,37 @@ int main(void){ for(pseudo=1;pseudo<41;pseudo++) { int k; +#if defined(SMALL_FOOTPRINT) opus_uint32 uu[KMAX+2U]; +#endif opus_uint32 inc; opus_uint32 nc; opus_uint32 i; k=get_pulses(pseudo); if (k>pkmax[t])break; printf("Testing CWRS with N=%i, K=%i...\n",n,k); +#if defined(SMALL_FOOTPRINT) nc=ncwrs_urow(n,k,uu); +#else + nc=CELT_PVQ_V(n,k); +#endif inc=nc/20000; if(inc<1)inc=1; for(i=0;i");*/ +#if defined(SMALL_FOOTPRINT) ii=icwrs(n,k,&v,y,u); +#else + ii=icwrs(n,y); + v=CELT_PVQ_V(n,k); +#endif if(ii!=i){ fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n", (long)ii,(long)i); @@ -139,81 +152,6 @@ int main(void){ (long)v,(long)nc); return 2; } -#ifndef SMALL_FOOTPRINT - if(n==2){ - cwrsi2(k,i,yy); - for(j=0;j<2;j++)if(yy[j]!=y[j]){ - fprintf(stderr,"N=2 pulse vector mismatch ({%i,%i}!={%i,%i}).\n", - yy[0],yy[1],y[0],y[1]); - return 3; - } - ii=icwrs2(yy,&kk); - if(ii!=i){ - fprintf(stderr,"N=2 combination-index mismatch (%lu!=%lu).\n", - (long)ii,(long)i); - return 4; - } - if(kk!=k){ - fprintf(stderr,"N=2 pulse count mismatch (%i,%i).\n",kk,k); - return 5; - } - v=ncwrs2(k); - if(v!=nc){ - fprintf(stderr,"N=2 combination count mismatch (%lu,%lu).\n", - (long)v,(long)nc); - return 6; - } - } - else if(n==3){ - cwrsi3(k,i,yy); - for(j=0;j<3;j++)if(yy[j]!=y[j]){ - fprintf(stderr,"N=3 pulse vector mismatch " - "({%i,%i,%i}!={%i,%i,%i}).\n",yy[0],yy[1],yy[2],y[0],y[1],y[2]); - return 7; - } - ii=icwrs3(yy,&kk); - if(ii!=i){ - fprintf(stderr,"N=3 combination-index mismatch (%lu!=%lu).\n", - (long)ii,(long)i); - return 8; - } - if(kk!=k){ - fprintf(stderr,"N=3 pulse count mismatch (%i!=%i).\n",kk,k); - return 9; - } - v=ncwrs3(k); - if(v!=nc){ - fprintf(stderr,"N=3 combination count mismatch (%lu!=%lu).\n", - (long)v,(long)nc); - return 10; - } - } - else if(n==4){ - cwrsi4(k,i,yy); - for(j=0;j<4;j++)if(yy[j]!=y[j]){ - fprintf(stderr,"N=4 pulse vector mismatch " - "({%i,%i,%i,%i}!={%i,%i,%i,%i}.\n", - yy[0],yy[1],yy[2],yy[3],y[0],y[1],y[2],y[3]); - return 11; - } - ii=icwrs4(yy,&kk); - if(ii!=i){ - fprintf(stderr,"N=4 combination-index mismatch (%lu!=%lu).\n", - (long)ii,(long)i); - return 12; - } - if(kk!=k){ - fprintf(stderr,"N=4 pulse count mismatch (%i!=%i).\n",kk,k); - return 13; - } - v=ncwrs4(k); - if(v!=nc){ - fprintf(stderr,"N=4 combination count mismatch (%lu!=%lu).\n", - (long)v,(long)nc); - return 14; - } - } -#endif /* SMALL_FOOTPRINT */ /*printf(" %6u\n",i);*/ } /*printf("\n");*/