daala_ec: Invert the internal state of the decoder
This removes one subtraction from the CDF search loop (reducing the
dependency chain for reading from the CDF) at the cost of one
increment and decrement during renormalization (easily absorbed by
the reorder buffer).
There should be no change in decoded output.
Change-Id: Ia7905bb8ca7c5d4ab73f23ccc61bcd3432349aa2
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index 1a96d73..c539359 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -88,7 +88,7 @@
s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
for (; s >= 0 && bptr < end; s -= 8, bptr++) {
OD_ASSERT(s <= OD_EC_WINDOW_SIZE - 8);
- dif |= (od_ec_window)bptr[0] << s;
+ dif ^= (od_ec_window)bptr[0] << s;
cnt += 8;
}
if (bptr >= end) {
@@ -114,7 +114,12 @@
OD_ASSERT(rng <= 65535U);
d = 16 - OD_ILOG_NZ(rng);
dec->cnt -= d;
+#if CONFIG_EC_SMALLMUL
+ /*This is equivalent to shifting in 1's instead of 0's.*/
+ dec->dif = ((dif + 1) << d) - 1;
+#else
dec->dif = dif << d;
+#endif
dec->rng = rng << d;
if (dec->cnt < 0) od_ec_dec_refill(dec);
return ret;
@@ -132,7 +137,11 @@
dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
dec->end = buf + storage;
dec->bptr = buf;
+#if CONFIG_EC_SMALLMUL
+ dec->dif = ((od_ec_window)1 << (OD_EC_WINDOW_SIZE - 1)) - 1;
+#else
dec->dif = 0;
+#endif
dec->rng = 0x8000;
dec->cnt = -15;
dec->error = 0;
@@ -156,10 +165,17 @@
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
OD_ASSERT(32768U <= r);
#if CONFIG_EC_SMALLMUL
- v = r - ((r >> 8) * (uint32_t)(32768U - fz) >> 7);
+ v = (r >> 8) * (uint32_t)(32768U - fz) >> 7;
+ vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+ ret = 1;
+ r_new = v;
+ if (dif >= vw) {
+ r_new = r - v;
+ dif -= vw;
+ ret = 0;
+ }
#else
v = fz * (uint32_t)r >> 15;
-#endif
vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
ret = 0;
r_new = v;
@@ -168,6 +184,7 @@
dif -= vw;
ret = 1;
}
+#endif
return od_ec_dec_normalize(dec, dif, r_new, ret);
}
@@ -192,20 +209,31 @@
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
OD_ASSERT(cdf[nsyms - 1] == 32768U);
OD_ASSERT(32768U <= r);
+#if CONFIG_EC_SMALLMUL
+ c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+ v = r;
+ ret = -1;
+ do {
+ u = v;
+ v = (r >> 8) * (uint32_t)(32768U - cdf[++ret]) >> 7;
+ } while (c < v);
+ OD_ASSERT(v < u);
+ OD_ASSERT(u <= r);
+ r = u - v;
+ dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+#else
c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
v = 0;
ret = -1;
do {
u = v;
-#if CONFIG_EC_SMALLMUL
- v = r - ((r >> 8) * (uint32_t)(32768U - cdf[++ret]) >> 7);
-#else
v = cdf[++ret] * (uint32_t)r >> 15;
-#endif
} while (v <= c);
+ OD_ASSERT(u < v);
OD_ASSERT(v <= r);
r = v - u;
dif -= (od_ec_window)u << (OD_EC_WINDOW_SIZE - 16);
+#endif
return od_ec_dec_normalize(dec, dif, r, ret);
}
diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h
index 68c06c4..9bab964 100644
--- a/aom_dsp/entdec.h
+++ b/aom_dsp/entdec.h
@@ -48,7 +48,14 @@
/*The read pointer for the entropy-coded bits.*/
const unsigned char *bptr;
/*The difference between the coded value and the low end of the current
- range.*/
+ range.
+ {EC_SMALLMUL} The difference between the high end of the current range,
+ (low + rng), and the coded value, minus 1.
+ This stores up to OD_EC_WINDOW_SIZE bits of that difference, but the
+ decoder only uses the top 16 bits of the window to decode the next symbol.
+ As we shift up during renormalization, if we don't have enough bits left in
+ the window to fill the top 16, we'll read in more bits of the coded
+ value.*/
od_ec_window dif;
/*The number of values in the current range.*/
uint16_t rng;