ans: Use a subnormal initial state

This saves the cost of encoding one symbol per partition. Currently in
practice this is usually the last DCT token sign so it saves one bit.
Most of the time these gains aren't realized but occasionally it makes
the final state one byte smaller or saves one renormalization.

Change-Id: Ibe5dc5d3471f51f932164b32e3bf28fb7b914805
diff --git a/aom_dsp/ansreader.h b/aom_dsp/ansreader.h
index 16fc3c8..faddf42 100644
--- a/aom_dsp/ansreader.h
+++ b/aom_dsp/ansreader.h
@@ -69,7 +69,7 @@
     ans->symbols_left--;
   }
 #endif
-  unsigned state = ans->state;
+  unsigned state = refill_state(ans, ans->state);
   const unsigned quotient = state / ANS_P8_PRECISION;
   const unsigned remainder = state % ANS_P8_PRECISION;
   const int value = remainder >= p0;
@@ -77,7 +77,7 @@
     state = quotient * (ANS_P8_PRECISION - p0) + remainder - p0;
   else
     state = quotient * p0 + remainder;
-  ans->state = refill_state(ans, state);
+  ans->state = state;
   return value;
 }
 
@@ -119,11 +119,11 @@
     ans->symbols_left--;
   }
 #endif
+  ans->state = refill_state(ans, ans->state);
   quo = ans->state / RANS_PRECISION;
   rem = ans->state % RANS_PRECISION;
   fetch_sym(&sym, tab, rem);
   ans->state = quo * sym.prob + rem - sym.cum_prob;
-  ans->state = refill_state(ans, ans->state);
   return sym.val;
 }
 
@@ -193,12 +193,12 @@
 }
 #endif
 
-static INLINE int ans_read_end(struct AnsDecoder *const ans) {
-  return ans->state == L_BASE;
+static INLINE int ans_read_end(const struct AnsDecoder *const ans) {
+  return ans->buf_offset == 0 && ans->state < L_BASE;
 }
 
 static INLINE int ans_reader_has_error(const struct AnsDecoder *const ans) {
-  return ans->state < L_BASE && ans->buf_offset == 0;
+  return ans->state < L_BASE / RANS_PRECISION;
 }
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/aom_dsp/buf_ans.c b/aom_dsp/buf_ans.c
index 01a95b0..80948cc 100644
--- a/aom_dsp/buf_ans.c
+++ b/aom_dsp/buf_ans.c
@@ -50,7 +50,15 @@
   if (c->offset == 0) return;
 #endif
   assert(c->offset > 0);
-  for (offset = c->offset - 1; offset >= 0; --offset) {
+  offset = c->offset - 1;
+  // Code the first symbol such that it brings the state to the smallest normal
+  // state from an initial state that would have been a subnormal/refill state.
+  if (c->buf[offset].method == ANS_METHOD_RANS) {
+    c->ans.state += c->buf[offset].val_start;
+  } else {
+    c->ans.state += c->buf[offset].val_start ? c->buf[offset].prob : 0;
+  }
+  for (offset = offset - 1; offset >= 0; --offset) {
     if (c->buf[offset].method == ANS_METHOD_RANS) {
       struct rans_sym sym;
       sym.prob = c->buf[offset].prob;