Yushin Cho | 77bba8d | 2016-11-04 16:36:56 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved |
| 3 | * |
| 4 | * This source code is subject to the terms of the BSD 2 Clause License and |
| 5 | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| 6 | * was not distributed with this source code in the LICENSE file, you can |
| 7 | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| 8 | * Media Patent License 1.0 was not distributed with this source code in the |
| 9 | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| 10 | */ |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 11 | |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 12 | #include <assert.h> |
Yaowu Xu | 931bc2a | 2016-10-14 13:53:51 -0700 | [diff] [blame] | 13 | #include "aom_dsp/entdec.h" |
Thomas Daede | 837262b | 2017-11-06 20:07:01 -0800 | [diff] [blame] | 14 | #include "aom_dsp/prob.h" |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 15 | |
| 16 | /*A range decoder. |
| 17 | This is an entropy decoder based upon \cite{Mar79}, which is itself a |
| 18 | rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. |
| 19 | It is very similar to arithmetic encoding, except that encoding is done with |
| 20 | digits in any base, instead of with bits, and so it is faster when using |
| 21 | larger bases (i.e.: a byte). |
| 22 | The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ |
| 23 | is the base, longer than the theoretical optimum, but to my knowledge there |
| 24 | is no published justification for this claim. |
| 25 | This only seems true when using near-infinite precision arithmetic so that |
| 26 | the process is carried out with no rounding errors. |
| 27 | |
| 28 | An excellent description of implementation details is available at |
| 29 | http://www.arturocampos.com/ac_range.html |
| 30 | A recent work \cite{MNW98} which proposes several changes to arithmetic |
| 31 | encoding for efficiency actually re-discovers many of the principles |
| 32 | behind range encoding, and presents a good theoretical analysis of them. |
| 33 | |
| 34 | End of stream is handled by writing out the smallest number of bits that |
| 35 | ensures that the stream will be correctly decoded regardless of the value of |
| 36 | any subsequent bits. |
| 37 | od_ec_dec_tell() can be used to determine how many bits were needed to decode |
| 38 | all the symbols thus far; other data can be packed in the remaining bits of |
| 39 | the input buffer. |
| 40 | @PHDTHESIS{Pas76, |
| 41 | author="Richard Clark Pasco", |
| 42 | title="Source coding algorithms for fast data compression", |
| 43 | school="Dept. of Electrical Engineering, Stanford University", |
| 44 | address="Stanford, CA", |
| 45 | month=May, |
| 46 | year=1976, |
| 47 | URL="http://www.richpasco.org/scaffdc.pdf" |
| 48 | } |
| 49 | @INPROCEEDINGS{Mar79, |
| 50 | author="Martin, G.N.N.", |
| 51 | title="Range encoding: an algorithm for removing redundancy from a digitised |
| 52 | message", |
| 53 | booktitle="Video & Data Recording Conference", |
| 54 | year=1979, |
| 55 | address="Southampton", |
| 56 | month=Jul, |
| 57 | URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz" |
| 58 | } |
| 59 | @ARTICLE{MNW98, |
| 60 | author="Alistair Moffat and Radford Neal and Ian H. Witten", |
| 61 | title="Arithmetic Coding Revisited", |
| 62 | journal="{ACM} Transactions on Information Systems", |
| 63 | year=1998, |
| 64 | volume=16, |
| 65 | number=3, |
| 66 | pages="256--294", |
| 67 | month=Jul, |
| 68 | URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf" |
| 69 | }*/ |
| 70 | |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 71 | /*This is meant to be a large, positive constant that can still be efficiently |
| 72 | loaded as an immediate (on platforms like ARM, for example). |
| 73 | Even relatively modest values like 100 would work fine.*/ |
| 74 | #define OD_EC_LOTS_OF_BITS (0x4000) |
| 75 | |
Wan-Teh Chang | c8ae568 | 2018-05-08 12:15:36 -0700 | [diff] [blame] | 76 | /*The return value of od_ec_dec_tell does not change across an od_ec_dec_refill |
| 77 | call.*/ |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 78 | static void od_ec_dec_refill(od_ec_dec *dec) { |
| 79 | int s; |
| 80 | od_ec_window dif; |
| 81 | int16_t cnt; |
| 82 | const unsigned char *bptr; |
| 83 | const unsigned char *end; |
| 84 | dif = dec->dif; |
| 85 | cnt = dec->cnt; |
| 86 | bptr = dec->bptr; |
| 87 | end = dec->end; |
| 88 | s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15); |
| 89 | for (; s >= 0 && bptr < end; s -= 8, bptr++) { |
Wan-Teh Chang | ed997ab | 2018-08-14 12:35:17 -0700 | [diff] [blame] | 90 | /*Each time a byte is inserted into the window (dif), bptr advances and cnt |
| 91 | is incremented by 8, so the total number of consumed bits (the return |
| 92 | value of od_ec_dec_tell) does not change.*/ |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 93 | assert(s <= OD_EC_WINDOW_SIZE - 8); |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 94 | dif ^= (od_ec_window)bptr[0] << s; |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 95 | cnt += 8; |
| 96 | } |
| 97 | if (bptr >= end) { |
Wan-Teh Chang | ed997ab | 2018-08-14 12:35:17 -0700 | [diff] [blame] | 98 | /*We've reached the end of the buffer. It is perfectly valid for us to need |
| 99 | to fill the window with additional bits past the end of the buffer (and |
| 100 | this happens in normal operation). These bits should all just be taken |
| 101 | as zero. But we cannot increment bptr past 'end' (this is undefined |
| 102 | behavior), so we start to increment dec->tell_offs. We also don't want |
| 103 | to keep testing bptr against 'end', so we set cnt to OD_EC_LOTS_OF_BITS |
| 104 | and adjust dec->tell_offs so that the total number of unconsumed bits in |
| 105 | the window (dec->cnt - dec->tell_offs) does not change. This effectively |
| 106 | puts lots of zero bits into the window, and means we won't try to refill |
| 107 | it from the buffer for a very long time (at which point we'll put lots |
| 108 | of zero bits into the window again).*/ |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 109 | dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt; |
| 110 | cnt = OD_EC_LOTS_OF_BITS; |
| 111 | } |
| 112 | dec->dif = dif; |
| 113 | dec->cnt = cnt; |
| 114 | dec->bptr = bptr; |
| 115 | } |
| 116 | |
| 117 | /*Takes updated dif and range values, renormalizes them so that |
| 118 | 32768 <= rng < 65536 (reading more bytes from the stream into dif if |
| 119 | necessary), and stores them back in the decoder context. |
| 120 | dif: The new value of dif. |
| 121 | rng: The new value of the range. |
| 122 | ret: The value to return. |
| 123 | Return: ret. |
| 124 | This allows the compiler to jump to this function via a tail-call.*/ |
Michael Bebenita | 63b44c4 | 2016-08-23 16:03:39 -0700 | [diff] [blame] | 125 | static int od_ec_dec_normalize(od_ec_dec *dec, od_ec_window dif, unsigned rng, |
| 126 | int ret) { |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 127 | int d; |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 128 | assert(rng <= 65535U); |
Wan-Teh Chang | ed997ab | 2018-08-14 12:35:17 -0700 | [diff] [blame] | 129 | /*The number of leading zeros in the 16-bit binary representation of rng.*/ |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 130 | d = 16 - OD_ILOG_NZ(rng); |
Wan-Teh Chang | ed997ab | 2018-08-14 12:35:17 -0700 | [diff] [blame] | 131 | /*d bits in dec->dif are consumed.*/ |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 132 | dec->cnt -= d; |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 133 | /*This is equivalent to shifting in 1's instead of 0's.*/ |
| 134 | dec->dif = ((dif + 1) << d) - 1; |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 135 | dec->rng = rng << d; |
| 136 | if (dec->cnt < 0) od_ec_dec_refill(dec); |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 137 | return ret; |
| 138 | } |
| 139 | |
| 140 | /*Initializes the decoder. |
| 141 | buf: The input buffer to use. |
Wan-Teh Chang | c0b290f | 2018-08-15 16:12:56 -0700 | [diff] [blame] | 142 | storage: The size in bytes of the input buffer.*/ |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 143 | void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf, |
| 144 | uint32_t storage) { |
| 145 | dec->buf = buf; |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 146 | dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8); |
| 147 | dec->end = buf + storage; |
| 148 | dec->bptr = buf; |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 149 | dec->dif = ((od_ec_window)1 << (OD_EC_WINDOW_SIZE - 1)) - 1; |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 150 | dec->rng = 0x8000; |
| 151 | dec->cnt = -15; |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 152 | od_ec_dec_refill(dec); |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 153 | } |
| 154 | |
Timothy B. Terriberry | ead5287 | 2017-03-07 20:27:34 -0800 | [diff] [blame] | 155 | /*Decode a single binary value. |
Timothy B. Terriberry | f9ef4f6 | 2017-08-25 11:24:18 -0700 | [diff] [blame] | 156 | f: The probability that the bit is one, scaled by 32768. |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 157 | Return: The value decoded (0 or 1).*/ |
Timothy B. Terriberry | ead5287 | 2017-03-07 20:27:34 -0800 | [diff] [blame] | 158 | int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f) { |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 159 | od_ec_window dif; |
| 160 | od_ec_window vw; |
| 161 | unsigned r; |
Nathan E. Egge | 5357dca | 2016-09-09 14:21:56 -0400 | [diff] [blame] | 162 | unsigned r_new; |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 163 | unsigned v; |
| 164 | int ret; |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 165 | assert(0 < f); |
| 166 | assert(f < 32768U); |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 167 | dif = dec->dif; |
| 168 | r = dec->rng; |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 169 | assert(dif >> (OD_EC_WINDOW_SIZE - 16) < r); |
| 170 | assert(32768U <= r); |
Thomas Davies | 736ddef | 2017-11-09 09:46:08 +0000 | [diff] [blame] | 171 | v = ((r >> 8) * (uint32_t)(f >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT)); |
Jonathan Matthews | 9ade394 | 2017-11-23 08:44:07 +0000 | [diff] [blame] | 172 | v += EC_MIN_PROB; |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 173 | vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16); |
| 174 | ret = 1; |
| 175 | r_new = v; |
| 176 | if (dif >= vw) { |
| 177 | r_new = r - v; |
| 178 | dif -= vw; |
| 179 | ret = 0; |
| 180 | } |
Nathan E. Egge | 5357dca | 2016-09-09 14:21:56 -0400 | [diff] [blame] | 181 | return od_ec_dec_normalize(dec, dif, r_new, ret); |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 182 | } |
| 183 | |
Timothy B. Terriberry | f9ef4f6 | 2017-08-25 11:24:18 -0700 | [diff] [blame] | 184 | /*Decodes a symbol given an inverse cumulative distribution function (CDF) |
| 185 | table in Q15. |
Thomas Daede | 837262b | 2017-11-06 20:07:01 -0800 | [diff] [blame] | 186 | icdf: CDF_PROB_TOP minus the CDF, such that symbol s falls in the range |
| 187 | [s > 0 ? (CDF_PROB_TOP - icdf[s - 1]) : 0, CDF_PROB_TOP - icdf[s]). |
Timothy B. Terriberry | f9ef4f6 | 2017-08-25 11:24:18 -0700 | [diff] [blame] | 188 | The values must be monotonically non-increasing, and icdf[nsyms - 1] |
| 189 | must be 0. |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 190 | nsyms: The number of symbols in the alphabet. |
| 191 | This should be at most 16. |
| 192 | Return: The decoded symbol s.*/ |
Timothy B. Terriberry | f9ef4f6 | 2017-08-25 11:24:18 -0700 | [diff] [blame] | 193 | int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *icdf, int nsyms) { |
Timothy B. Terriberry | 561eb7c | 2017-03-07 18:06:44 -0800 | [diff] [blame] | 194 | od_ec_window dif; |
| 195 | unsigned r; |
| 196 | unsigned c; |
| 197 | unsigned u; |
| 198 | unsigned v; |
| 199 | int ret; |
| 200 | (void)nsyms; |
| 201 | dif = dec->dif; |
| 202 | r = dec->rng; |
Thomas Davies | 736ddef | 2017-11-09 09:46:08 +0000 | [diff] [blame] | 203 | const int N = nsyms - 1; |
| 204 | |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 205 | assert(dif >> (OD_EC_WINDOW_SIZE - 16) < r); |
| 206 | assert(icdf[nsyms - 1] == OD_ICDF(CDF_PROB_TOP)); |
| 207 | assert(32768U <= r); |
| 208 | assert(7 - EC_PROB_SHIFT - CDF_SHIFT >= 0); |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 209 | c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16)); |
| 210 | v = r; |
| 211 | ret = -1; |
| 212 | do { |
| 213 | u = v; |
Thomas Davies | 736ddef | 2017-11-09 09:46:08 +0000 | [diff] [blame] | 214 | v = ((r >> 8) * (uint32_t)(icdf[++ret] >> EC_PROB_SHIFT) >> |
Thomas Daede | 837262b | 2017-11-06 20:07:01 -0800 | [diff] [blame] | 215 | (7 - EC_PROB_SHIFT - CDF_SHIFT)); |
Thomas Davies | 736ddef | 2017-11-09 09:46:08 +0000 | [diff] [blame] | 216 | v += EC_MIN_PROB * (N - ret); |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 217 | } while (c < v); |
Sebastien Alaiwan | 0d52f12 | 2018-03-06 09:54:23 +0100 | [diff] [blame] | 218 | assert(v < u); |
| 219 | assert(u <= r); |
Timothy B. Terriberry | 881f109 | 2017-03-07 20:03:09 -0800 | [diff] [blame] | 220 | r = u - v; |
| 221 | dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16); |
Timothy B. Terriberry | 561eb7c | 2017-03-07 18:06:44 -0800 | [diff] [blame] | 222 | return od_ec_dec_normalize(dec, dif, r, ret); |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 223 | } |
| 224 | |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 225 | /*Returns the number of bits "used" by the decoded symbols so far. |
| 226 | This same number can be computed in either the encoder or the decoder, and is |
| 227 | suitable for making coding decisions. |
| 228 | Return: The number of bits. |
| 229 | This will always be slightly larger than the exact value (e.g., all |
| 230 | rounding error is in the positive direction).*/ |
Nathan E. Egge | 19698a7 | 2016-08-18 02:34:53 -0400 | [diff] [blame] | 231 | int od_ec_dec_tell(const od_ec_dec *dec) { |
Wan-Teh Chang | ed997ab | 2018-08-14 12:35:17 -0700 | [diff] [blame] | 232 | /*There is a window of bits stored in dec->dif. The difference |
| 233 | (dec->bptr - dec->buf) tells us how many bytes have been read into this |
| 234 | window. The difference (dec->cnt - dec->tell_offs) tells us how many of |
| 235 | the bits in that window remain unconsumed.*/ |
Wan-Teh Chang | c8ae568 | 2018-05-08 12:15:36 -0700 | [diff] [blame] | 236 | return (int)((dec->bptr - dec->buf) * 8 - dec->cnt + dec->tell_offs); |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 237 | } |
| 238 | |
| 239 | /*Returns the number of bits "used" by the decoded symbols so far. |
| 240 | This same number can be computed in either the encoder or the decoder, and is |
| 241 | suitable for making coding decisions. |
| 242 | Return: The number of bits scaled by 2**OD_BITRES. |
| 243 | This will always be slightly larger than the exact value (e.g., all |
| 244 | rounding error is in the positive direction).*/ |
Nathan E. Egge | 19698a7 | 2016-08-18 02:34:53 -0400 | [diff] [blame] | 245 | uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) { |
Nathan E. Egge | 1078dee | 2016-03-06 10:59:29 -0500 | [diff] [blame] | 246 | return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng); |
| 247 | } |