ans: Refill state at the end of the decoding process.

This should have no effect on the bitstream format (see also no related
encoder change). This is like moving code from the top of the loop to
the bottom of the loop.

This change allows us to:
* Make sure we consume the final renormalization byte after the last
symbol in an ANS partition.
* Move back toward a single renormalization operation for some ANS modes
since we know the bounds of the state mutation algorithm that got us out
of the valid state range.

Change-Id: Ia80246fd0ed805aa61b913a362546b3f08e4d79c
diff --git a/aom_dsp/ansreader.h b/aom_dsp/ansreader.h
index 05955a2..5855789 100644
--- a/aom_dsp/ansreader.h
+++ b/aom_dsp/ansreader.h
@@ -49,22 +49,24 @@
   AnsP8 p = ANS_P8_PRECISION - p0;
   int s;
   unsigned xp, sp;
-  unsigned state = refill_state(ans, ans->state);
+  unsigned state = ans->state;
   sp = state * p;
   xp = sp / ANS_P8_PRECISION;
   s = (sp & 0xFF) >= p0;
   if (s)
-    ans->state = xp;
+    state = xp;
   else
-    ans->state = state - xp;
+    state -= xp;
+  ans->state = refill_state(ans, state);
   return s;
 }
 
 static INLINE int uabs_read_bit(struct AnsDecoder *ans) {
   int s;
-  unsigned state = refill_state(ans, ans->state);
+  unsigned state = ans->state;
   s = (int)(state & 1);
-  ans->state = state >> 1;
+  state >>= 1;
+  ans->state = refill_state(ans, state);
   return s;
 }
 
@@ -92,11 +94,11 @@
   unsigned rem;
   unsigned quo;
   struct rans_dec_sym sym;
-  ans->state = refill_state(ans, ans->state);
   quo = ans->state / RANS_PRECISION;
   rem = ans->state % RANS_PRECISION;
   fetch_sym(&sym, tab, rem);
   ans->state = quo * sym.prob + rem - sym.cum_prob;
+  ans->state = refill_state(ans, ans->state);
   return sym.val;
 }