Merge "optimize decode_bool operation"
diff --git a/configure b/configure
index 621161c..729c986 100755
--- a/configure
+++ b/configure
@@ -249,7 +249,6 @@
     unistd_h
 "
 EXPERIMENT_LIST="
-    oneshotq
     multiple_arf
     non420
     alpha
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7c07975..4b60cfd 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3574,7 +3574,8 @@
                 for (i=cpi->current_layer+1; i<cpi->oxcf.number_of_layers; i++)
                 {
                     LAYER_CONTEXT *lc = &cpi->layer_context[i];
-                    lc->bits_off_target += cpi->av_per_frame_bandwidth;
+                    lc->bits_off_target += (int)(lc->target_bandwidth /
+                                                 lc->framerate);
                     if (lc->bits_off_target > lc->maximum_buffer_size)
                         lc->bits_off_target = lc->maximum_buffer_size;
                     lc->buffer_level = lc->bits_off_target;
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 1e8259c..fe4db13 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -956,6 +956,21 @@
             if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size)
               cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size;
             cpi->buffer_level = cpi->bits_off_target;
+
+            if (cpi->oxcf.number_of_layers > 1) {
+              unsigned int i;
+
+              // Propagate bits saved by dropping the frame to higher layers.
+              for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers;
+                  i++) {
+                LAYER_CONTEXT *lc = &cpi->layer_context[i];
+                lc->bits_off_target += (int)(lc->target_bandwidth /
+                                             lc->framerate);
+                if (lc->bits_off_target > lc->maximum_buffer_size)
+                  lc->bits_off_target = lc->maximum_buffer_size;
+                lc->buffer_level = lc->bits_off_target;
+              }
+            }
         }
     }
 
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index c4d7c38..a963d55 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -350,23 +350,15 @@
 #define COUNT_SAT 20
 #define MAX_UPDATE_FACTOR 128
 
-static int update_ct(vp9_prob pre_prob, const unsigned int ct[2]) {
+static int adapt_prob(vp9_prob pre_prob, const unsigned int ct[2]) {
   return merge_probs(pre_prob, ct, COUNT_SAT, MAX_UPDATE_FACTOR);
 }
 
-static void update_mode_probs(int n_modes,
-                              const vp9_tree_index *tree,
-                              const unsigned int *cnt,
-                              const vp9_prob *pre_probs, vp9_prob *dst_probs,
-                              unsigned int tok0_offset) {
-#define MAX_PROBS 32
-  unsigned int branch_ct[MAX_PROBS][2];
-  int t;
-
-  assert(n_modes - 1 < MAX_PROBS);
-  vp9_tree_probs_from_distribution(tree, branch_ct, cnt, tok0_offset);
-  for (t = 0; t < n_modes - 1; ++t)
-    dst_probs[t] = update_ct(pre_probs[t], branch_ct[t]);
+static void adapt_probs(const vp9_tree_index *tree,
+                        const vp9_prob *pre_probs, const unsigned int *counts,
+                        unsigned int offset, vp9_prob *probs) {
+  tree_merge_probs(tree, pre_probs, counts, offset,
+                   COUNT_SAT, MAX_UPDATE_FACTOR, probs);
 }
 
 void vp9_adapt_mode_probs(VP9_COMMON *cm) {
@@ -376,44 +368,40 @@
   const FRAME_COUNTS *counts = &cm->counts;
 
   for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
-    fc->intra_inter_prob[i] = update_ct(pre_fc->intra_inter_prob[i],
-                                        counts->intra_inter[i]);
+    fc->intra_inter_prob[i] = adapt_prob(pre_fc->intra_inter_prob[i],
+                                         counts->intra_inter[i]);
   for (i = 0; i < COMP_INTER_CONTEXTS; i++)
-    fc->comp_inter_prob[i] = update_ct(pre_fc->comp_inter_prob[i],
-                                       counts->comp_inter[i]);
+    fc->comp_inter_prob[i] = adapt_prob(pre_fc->comp_inter_prob[i],
+                                        counts->comp_inter[i]);
   for (i = 0; i < REF_CONTEXTS; i++)
-    fc->comp_ref_prob[i] = update_ct(pre_fc->comp_ref_prob[i],
-                                     counts->comp_ref[i]);
+    fc->comp_ref_prob[i] = adapt_prob(pre_fc->comp_ref_prob[i],
+                                      counts->comp_ref[i]);
   for (i = 0; i < REF_CONTEXTS; i++)
     for (j = 0; j < 2; j++)
-      fc->single_ref_prob[i][j] = update_ct(pre_fc->single_ref_prob[i][j],
-                                            counts->single_ref[i][j]);
+      fc->single_ref_prob[i][j] = adapt_prob(pre_fc->single_ref_prob[i][j],
+                                             counts->single_ref[i][j]);
 
   for (i = 0; i < INTER_MODE_CONTEXTS; i++)
-    update_mode_probs(INTER_MODES, vp9_inter_mode_tree,
-                      counts->inter_mode[i], pre_fc->inter_mode_probs[i],
-                      fc->inter_mode_probs[i], NEARESTMV);
+    adapt_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i],
+                counts->inter_mode[i], NEARESTMV, fc->inter_mode_probs[i]);
 
   for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
-    update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
-                      counts->y_mode[i], pre_fc->y_mode_prob[i],
-                      fc->y_mode_prob[i], 0);
+    adapt_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i],
+                counts->y_mode[i], 0, fc->y_mode_prob[i]);
 
   for (i = 0; i < INTRA_MODES; ++i)
-    update_mode_probs(INTRA_MODES, vp9_intra_mode_tree,
-                      counts->uv_mode[i], pre_fc->uv_mode_prob[i],
-                      fc->uv_mode_prob[i], 0);
+    adapt_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i],
+                counts->uv_mode[i], 0, fc->uv_mode_prob[i]);
 
   for (i = 0; i < PARTITION_CONTEXTS; i++)
-    update_mode_probs(PARTITION_TYPES, vp9_partition_tree, counts->partition[i],
-                      pre_fc->partition_prob[i], fc->partition_prob[i], 0);
+    adapt_probs(vp9_partition_tree, pre_fc->partition_prob[i],
+                counts->partition[i], 0, fc->partition_prob[i]);
 
   if (cm->mcomp_filter_type == SWITCHABLE) {
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
-      update_mode_probs(SWITCHABLE_FILTERS, vp9_switchable_interp_tree,
-                        counts->switchable_interp[i],
-                        pre_fc->switchable_interp_prob[i],
-                        fc->switchable_interp_prob[i], 0);
+      adapt_probs(vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i],
+                  counts->switchable_interp[i], 0,
+                  fc->switchable_interp_prob[i]);
   }
 
   if (cm->tx_mode == TX_MODE_SELECT) {
@@ -425,23 +413,24 @@
     for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
       tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
       for (j = 0; j < TX_SIZES - 3; ++j)
-        fc->tx_probs.p8x8[i][j] = update_ct(pre_fc->tx_probs.p8x8[i][j],
-                                            branch_ct_8x8p[j]);
+        fc->tx_probs.p8x8[i][j] = adapt_prob(pre_fc->tx_probs.p8x8[i][j],
+                                             branch_ct_8x8p[j]);
 
       tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
       for (j = 0; j < TX_SIZES - 2; ++j)
-        fc->tx_probs.p16x16[i][j] = update_ct(pre_fc->tx_probs.p16x16[i][j],
-                                              branch_ct_16x16p[j]);
+        fc->tx_probs.p16x16[i][j] = adapt_prob(pre_fc->tx_probs.p16x16[i][j],
+                                               branch_ct_16x16p[j]);
 
       tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
       for (j = 0; j < TX_SIZES - 1; ++j)
-        fc->tx_probs.p32x32[i][j] = update_ct(pre_fc->tx_probs.p32x32[i][j],
-                                              branch_ct_32x32p[j]);
+        fc->tx_probs.p32x32[i][j] = adapt_prob(pre_fc->tx_probs.p32x32[i][j],
+                                               branch_ct_32x32p[j]);
     }
   }
 
   for (i = 0; i < MBSKIP_CONTEXTS; ++i)
-    fc->mbskip_probs[i] = update_ct(pre_fc->mbskip_probs[i], counts->mbskip[i]);
+    fc->mbskip_probs[i] = adapt_prob(pre_fc->mbskip_probs[i],
+                                     counts->mbskip[i]);
 }
 
 static void set_default_lf_deltas(struct loopfilter *lf) {
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 3ebb701..b061cdb 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -194,57 +194,44 @@
   return merge_probs(prep, ct, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR);
 }
 
-static unsigned int adapt_probs(unsigned int i,
-                                vp9_tree tree,
-                                vp9_prob this_probs[],
-                                const vp9_prob last_probs[],
-                                const unsigned int num_events[]) {
-  const unsigned int left = tree[i] <= 0
-          ? num_events[-tree[i]]
-          : adapt_probs(tree[i], tree, this_probs, last_probs, num_events);
-
-  const unsigned int right = tree[i + 1] <= 0
-          ? num_events[-tree[i + 1]]
-          : adapt_probs(tree[i + 1], tree, this_probs, last_probs, num_events);
-  const unsigned int ct[2] = { left, right };
-  this_probs[i >> 1] = adapt_prob(last_probs[i >> 1], ct);
-  return left + right;
+static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+                        const unsigned int *counts, vp9_prob *probs) {
+  tree_merge_probs(tree, pre_probs, counts, 0,
+                   MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR, probs);
 }
 
-
 void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
   int i, j;
 
-  const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
+  nmv_context *fc = &cm->fc.nmvc;
+  const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
+  const nmv_context_counts *counts = &cm->counts.mv;
 
-  nmv_context *ctx = &cm->fc.nmvc;
-  const nmv_context *pre_ctx = &pre_fc->nmvc;
-  const nmv_context_counts *cts = &cm->counts.mv;
-
-  adapt_probs(0, vp9_mv_joint_tree, ctx->joints, pre_ctx->joints, cts->joints);
+  adapt_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints,
+                     fc->joints);
 
   for (i = 0; i < 2; ++i) {
-    ctx->comps[i].sign = adapt_prob(pre_ctx->comps[i].sign, cts->comps[i].sign);
-    adapt_probs(0, vp9_mv_class_tree, ctx->comps[i].classes,
-                pre_ctx->comps[i].classes, cts->comps[i].classes);
-    adapt_probs(0, vp9_mv_class0_tree, ctx->comps[i].class0,
-                pre_ctx->comps[i].class0, cts->comps[i].class0);
+    nmv_component *comp = &fc->comps[i];
+    const nmv_component *pre_comp = &pre_fc->comps[i];
+    const nmv_component_counts *c = &counts->comps[i];
+
+    comp->sign = adapt_prob(pre_comp->sign, c->sign);
+    adapt_probs(vp9_mv_class_tree, pre_comp->classes, c->classes,
+                comp->classes);
+    adapt_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, comp->class0);
 
     for (j = 0; j < MV_OFFSET_BITS; ++j)
-        ctx->comps[i].bits[j] = adapt_prob(pre_ctx->comps[i].bits[j],
-                                           cts->comps[i].bits[j]);
+      comp->bits[j] = adapt_prob(pre_comp->bits[j], c->bits[j]);
 
     for (j = 0; j < CLASS0_SIZE; ++j)
-      adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].class0_fp[j],
-                  pre_ctx->comps[i].class0_fp[j], cts->comps[i].class0_fp[j]);
+      adapt_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], c->class0_fp[j],
+                  comp->class0_fp[j]);
 
-    adapt_probs(0, vp9_mv_fp_tree, ctx->comps[i].fp, pre_ctx->comps[i].fp,
-                cts->comps[i].fp);
+    adapt_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp);
 
     if (allow_hp) {
-      ctx->comps[i].class0_hp = adapt_prob(pre_ctx->comps[i].class0_hp,
-                                           cts->comps[i].class0_hp);
-      ctx->comps[i].hp = adapt_prob(pre_ctx->comps[i].hp, cts->comps[i].hp);
+      comp->class0_hp = adapt_prob(pre_comp->class0_hp, c->class0_hp);
+      comp->hp = adapt_prob(pre_comp->hp, c->hp);
     }
   }
 }
diff --git a/vp9/common/vp9_treecoder.h b/vp9/common/vp9_treecoder.h
index 3cc9ce1..9c776d6 100644
--- a/vp9/common/vp9_treecoder.h
+++ b/vp9/common/vp9_treecoder.h
@@ -91,5 +91,37 @@
   return weighted_prob(pre_prob, prob, factor);
 }
 
+static unsigned int tree_merge_probs_impl(unsigned int i,
+                                          const vp9_tree_index *tree,
+                                          const vp9_prob *pre_probs,
+                                          const unsigned int *counts,
+                                          unsigned int count_sat,
+                                          unsigned int max_update_factor,
+                                          vp9_prob *probs) {
+  const int l = tree[i];
+  const unsigned int left_count = (l <= 0)
+                 ? counts[-l]
+                 : tree_merge_probs_impl(l, tree, pre_probs, counts,
+                                         count_sat, max_update_factor, probs);
+  const int r = tree[i + 1];
+  const unsigned int right_count = (r <= 0)
+                 ? counts[-r]
+                 : tree_merge_probs_impl(r, tree, pre_probs, counts,
+                                         count_sat, max_update_factor, probs);
+  const unsigned int ct[2] = { left_count, right_count };
+  probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
+                              count_sat, max_update_factor);
+  return left_count + right_count;
+}
+
+static void tree_merge_probs(const vp9_tree_index *tree,
+                             const vp9_prob *pre_probs,
+                             const unsigned int *counts, int offset,
+                             unsigned int count_sat,
+                             unsigned int max_update_factor, vp9_prob *probs) {
+  tree_merge_probs_impl(0, tree, pre_probs, &counts[-offset],
+                        count_sat, max_update_factor, probs);
+}
+
 
 #endif  // VP9_COMMON_VP9_TREECODER_H_
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index 63b889d..1fd9e97 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -41,6 +41,7 @@
   VP9_COMMON *cm;
   vp9_reader bit_reader;
   DECLARE_ALIGNED(16, MACROBLOCKD, xd);
+  DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
 } TileWorkerData;
 
 static int read_be32(const uint8_t *p) {
@@ -297,6 +298,7 @@
   VP9_COMMON *cm;
   MACROBLOCKD *xd;
   vp9_reader *r;
+  unsigned char* token_cache;
 };
 
 static void predict_and_reconstruct_intra_block(int plane, int block,
@@ -326,7 +328,7 @@
 
   if (!mi->mbmi.skip_coeff) {
     vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size,
-                            args->r);
+                            args->r, args->token_cache);
     inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
   }
 }
@@ -336,6 +338,7 @@
   MACROBLOCKD *xd;
   vp9_reader *r;
   int *eobtotal;
+  unsigned char* token_cache;
 };
 
 static void reconstruct_inter_block(int plane, int block,
@@ -346,7 +349,8 @@
   MACROBLOCKD *const xd = args->xd;
 
   *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block,
-                                             plane_bsize, tx_size, args->r);
+                                             plane_bsize, tx_size,
+                                             args->r, args->token_cache);
   inverse_transform_block(xd, plane, block, plane_bsize, tx_size);
 }
 
@@ -398,7 +402,8 @@
 static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                            const TileInfo *const tile,
                            int mi_row, int mi_col,
-                           vp9_reader *r, BLOCK_SIZE bsize) {
+                           vp9_reader *r, BLOCK_SIZE bsize,
+                           unsigned char *token_cache) {
   const int less8x8 = bsize < BLOCK_8X8;
   MB_MODE_INFO *mbmi;
 
@@ -420,7 +425,7 @@
   }
 
   if (!is_inter_block(mbmi)) {
-    struct intra_args arg = { cm, xd, r };
+    struct intra_args arg = { cm, xd, r, token_cache };
     foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block,
                               &arg);
   } else {
@@ -438,7 +443,7 @@
     // Reconstruction
     if (!mbmi->skip_coeff) {
       int eobtotal = 0;
-      struct inter_args arg = { cm, xd, r, &eobtotal };
+      struct inter_args arg = { cm, xd, r, &eobtotal, token_cache };
       foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
       if (!less8x8 && eobtotal == 0)
         mbmi->skip_coeff = 1;  // skip loopfilter
@@ -477,7 +482,8 @@
 static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                             const TileInfo *const tile,
                             int mi_row, int mi_col,
-                            vp9_reader* r, BLOCK_SIZE bsize) {
+                            vp9_reader* r, BLOCK_SIZE bsize,
+                            unsigned char *token_cache) {
   const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
   PARTITION_TYPE partition;
   BLOCK_SIZE subsize;
@@ -488,27 +494,33 @@
   partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
   subsize = get_subsize(bsize, partition);
   if (subsize < BLOCK_8X8) {
-    decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+    decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
   } else {
     switch (partition) {
       case PARTITION_NONE:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
         break;
       case PARTITION_HORZ:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
         if (mi_row + hbs < cm->mi_rows)
-          decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
+          decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
+                         token_cache);
         break;
       case PARTITION_VERT:
-        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize);
+        decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize, token_cache);
         if (mi_col + hbs < cm->mi_cols)
-          decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
+          decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
+                         token_cache);
         break;
       case PARTITION_SPLIT:
-        decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize);
-        decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize);
-        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize);
-        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
+        decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize,
+                        token_cache);
+        decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize,
+                        token_cache);
+        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize,
+                        token_cache);
+        decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize,
+                        token_cache);
         break;
       default:
         assert(!"Invalid partition type");
@@ -791,7 +803,8 @@
     vp9_zero(xd->left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
          mi_col += MI_BLOCK_SIZE)
-      decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64);
+      decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64,
+                      pbi->token_cache);
 
     if (pbi->do_loopfilter_inline) {
       const int lf_start = mi_row - MI_BLOCK_SIZE;
@@ -935,7 +948,7 @@
 }
 
 static int tile_worker_hook(void *arg1, void *arg2) {
-  TileWorkerData *const tile_data = (TileWorkerData*)arg1;
+  TileWorkerData *tile_data = (TileWorkerData*)arg1;
   const TileInfo *const tile = (TileInfo*)arg2;
   int mi_row, mi_col;
 
@@ -944,9 +957,11 @@
     vp9_zero(tile_data->xd.left_context);
     vp9_zero(tile_data->xd.left_seg_context);
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
-         mi_col += MI_BLOCK_SIZE)
+         mi_col += MI_BLOCK_SIZE) {
       decode_modes_sb(tile_data->cm, &tile_data->xd, tile,
-                      mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64);
+                      mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64,
+                      tile_data->token_cache);
+    }
   }
   return !tile_data->xd.corrupted;
 }
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 6ecce28..84a75e4 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -70,7 +70,6 @@
                      DCT_EOB_MODEL_TOKEN : TWO_TOKEN) :  \
                     token];                              \
     }                                                    \
-    token_cache[scan[c]] = vp9_pt_energy_class[token];   \
   } while (0)
 
 #define WRITE_COEF_CONTINUE(val, token)                  \
@@ -78,20 +77,21 @@
     qcoeff_ptr[scan[c]] = vp9_read_and_apply_sign(r, val) * \
                             dq[c > 0] / (1 + (tx_size == TX_32X32)); \
     INCREMENT_COUNT(token);                              \
+    token_cache[scan[c]] = vp9_pt_energy_class[token];   \
     c++;                                                 \
     continue;                                            \
   }
 
-#define ADJUST_COEF(prob, bits_count)  \
-  do {                                 \
-    if (vp9_read(r, prob))             \
-      val += 1 << bits_count;          \
+#define ADJUST_COEF(prob, bits_count)                   \
+  do {                                                  \
+    val += (vp9_read(r, prob) << bits_count);           \
   } while (0);
 
 static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
                         vp9_reader *r, int block_idx,
                         PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr,
-                        TX_SIZE tx_size, const int16_t *dq, int pt) {
+                        TX_SIZE tx_size, const int16_t *dq, int pt,
+                        uint8_t *token_cache) {
   const FRAME_CONTEXT *const fc = &cm->fc;
   FRAME_COUNTS *const counts = &cm->counts;
   const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
@@ -104,7 +104,6 @@
   vp9_coeff_count_model *coef_counts = counts->coef[tx_size];
   const int16_t *scan, *nb;
   const uint8_t *const band_translate = get_band_translate(tx_size);
-  uint8_t token_cache[1024];
   get_scan(xd, tx_size, type, block_idx, &scan, &nb);
 
   while (1) {
@@ -131,6 +130,7 @@
 
     if (!vp9_read(r, prob[ZERO_CONTEXT_NODE])) {
       INCREMENT_COUNT(ZERO_TOKEN);
+      token_cache[scan[c]] = vp9_pt_energy_class[ZERO_TOKEN];
       ++c;
       goto SKIP_START;
     }
@@ -212,7 +212,8 @@
 
 int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
                             int plane, int block, BLOCK_SIZE plane_bsize,
-                            TX_SIZE tx_size, vp9_reader *r) {
+                            TX_SIZE tx_size, vp9_reader *r,
+                            uint8_t *token_cache) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
   const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id,
                                  tx_size);
@@ -223,7 +224,7 @@
 
   eob = decode_coefs(cm, xd, r, block,
                      pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block),
-                     tx_size, pd->dequant, pt);
+                     tx_size, pd->dequant, pt, token_cache);
 
   set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff);
 
diff --git a/vp9/decoder/vp9_detokenize.h b/vp9/decoder/vp9_detokenize.h
index 94dd8e4..04939ea 100644
--- a/vp9/decoder/vp9_detokenize.h
+++ b/vp9/decoder/vp9_detokenize.h
@@ -17,6 +17,7 @@
 
 int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
                             int plane, int block, BLOCK_SIZE plane_bsize,
-                            TX_SIZE tx_size, vp9_reader *r);
+                            TX_SIZE tx_size, vp9_reader *r,
+                            uint8_t *token_cache);
 
 #endif  // VP9_DECODER_VP9_DETOKENIZE_H_
diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h
index 83ea967..7c4c9db 100644
--- a/vp9/decoder/vp9_onyxd_int.h
+++ b/vp9/decoder/vp9_onyxd_int.h
@@ -49,6 +49,8 @@
 
   ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
   PARTITION_CONTEXT *above_seg_context;
+
+  DECLARE_ALIGNED(16, unsigned char, token_cache[1024]);
 } VP9D_COMP;
 
 #endif  // VP9_DECODER_VP9_ONYXD_INT_H_
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c83954e..c3dbc86 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2169,17 +2169,14 @@
       cpi->ni_av_qi = tmp_q;
       cpi->avg_q = vp9_convert_qindex_to_q(tmp_q);
 
-#ifndef ONE_SHOT_Q_ESTIMATE
       // Limit the maxq value returned subsequently.
       // This increases the risk of overspend or underspend if the initial
       // estimate for the clip is bad, but helps prevent excessive
       // variation in Q, especially near the end of a clip
       // where for example a small overspend may cause Q to crash
       adjust_maxq_qrange(cpi);
-#endif
     }
 
-#ifndef ONE_SHOT_Q_ESTIMATE
     // The last few frames of a clip almost always have to few or too many
     // bits and for the sake of over exact rate control we dont want to make
     // radical adjustments to the allowed quantizer range just to use up a
@@ -2202,7 +2199,6 @@
       cpi->active_worst_quality =
           adjust_active_maxq(cpi->active_worst_quality, tmp_q);
     }
-#endif
   }
   vp9_zero(this_frame);
   if (EOF == input_stats(cpi, &this_frame))
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 0b05cf2..1d3170a 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1180,7 +1180,6 @@
   int i;
 
   cpi->oxcf = *oxcf;
-  cpi->goldfreq = 7;
 
   cm->version = oxcf->version;
 
@@ -2851,19 +2850,11 @@
     if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
       cpi->active_best_quality = cpi->cq_target_quality;
     } else {
-#ifdef ONE_SHOT_Q_ESTIMATE
-#ifdef STRICT_ONE_SHOT_Q
-      cpi->active_best_quality = q;
-#else
-      cpi->active_best_quality = inter_minq[q];
-#endif
-#else
       cpi->active_best_quality = inter_minq[q];
       // 1-pass: for now, use the average Q for the active_best, if its lower
       // than active_worst.
       if (cpi->pass == 0 && (cpi->avg_frame_qindex < q))
         cpi->active_best_quality = inter_minq[cpi->avg_frame_qindex];
-#endif
 
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 44f1e26..9429c7f 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -29,11 +29,6 @@
 #include "vp9/common/vp9_findnearmv.h"
 #include "vp9/encoder/vp9_lookahead.h"
 
-// Experimental rate control switches
-#if CONFIG_ONESHOTQ
-#define ONE_SHOT_Q_ESTIMATE 0
-#define STRICT_ONE_SHOT_Q 0
-#endif
 #define DISABLE_RC_LONG_TERM_MEM 0
 
 // #define MODE_TEST_HIT_STATS
@@ -506,14 +501,9 @@
   int decimation_count;
 
   // for real time encoding
-  int avg_encode_time;              // microsecond
-  int avg_pick_mode_time;            // microsecond
   int speed;
-  unsigned int cpu_freq;           // Mhz
   int compressor_speed;
 
-  int interquantizer;
-  int goldfreq;
   int auto_worst_q;
   int cpu_used;
   int pass;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 05928e0..993919e 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1092,7 +1092,7 @@
         else
           x->fwd_txm4x4(src_diff, coeff, 8);
 
-        vp9_regular_quantize_b_4x4(x, 16, block, scan, get_iscan_4x4(tx_type));
+        vp9_regular_quantize_b_4x4(x, 4, block, scan, get_iscan_4x4(tx_type));
 
         ratey += cost_coeffs(x, 0, block,
                              tempa + idx, templ + idy, TX_4X4, scan, nb);
@@ -1559,7 +1559,7 @@
       coeff = BLOCK_OFFSET(p->coeff, k);
       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
                     coeff, 8);
-      vp9_regular_quantize_b_4x4(x, 16, k, get_scan_4x4(DCT_DCT),
+      vp9_regular_quantize_b_4x4(x, 4, k, get_scan_4x4(DCT_DCT),
                                  get_iscan_4x4(DCT_DCT));
       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
                                         16, &ssz);
@@ -1871,12 +1871,14 @@
           mi_buf_restore(x, orig_src, orig_pre);
         }
 
-        if (has_second_rf && this_mode == NEWMV &&
-            mbmi->interp_filter == EIGHTTAP) {
+        if (has_second_rf) {
           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
             continue;
+        }
 
+        if (has_second_rf && this_mode == NEWMV &&
+            mbmi->interp_filter == EIGHTTAP) {
           // adjust src pointers
           mi_buf_shift(x, i);
           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
@@ -2660,6 +2662,12 @@
   int orig_dst_stride[MAX_MB_PLANE];
   int rs = 0;
 
+  if (is_comp_pred) {
+    if (frame_mv[refs[0]].as_int == INVALID_MV ||
+        frame_mv[refs[1]].as_int == INVALID_MV)
+      return INT64_MAX;
+  }
+
   if (this_mode == NEWMV) {
     int rate_mv;
     if (is_comp_pred) {
@@ -2678,9 +2686,6 @@
                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
       }
-      if (frame_mv[refs[0]].as_int == INVALID_MV ||
-          frame_mv[refs[1]].as_int == INVALID_MV)
-        return INT64_MAX;
       *rate2 += rate_mv;
     } else {
       int_mv tmp_mv;
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 550263a..579f7a6 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -21,14 +21,6 @@
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_entropy.h"
 
-/* Global event counters used for accumulating statistics across several
-   compressions, then generating vp9_context.c = initial stats. */
-
-#ifdef ENTROPY_STATS
-vp9_coeff_accum context_counters[TX_SIZES][BLOCK_TYPES];
-extern vp9_coeff_stats tree_update_hist[TX_SIZES][BLOCK_TYPES];
-#endif  /* ENTROPY_STATS */
-
 static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
 const TOKENVALUE *vp9_dct_value_tokens_ptr;
 static int dct_value_cost[DCT_MAX_VALUE * 2];
@@ -226,149 +218,6 @@
   }
 }
 
-#ifdef ENTROPY_STATS
-void init_context_counters(void) {
-  FILE *f = fopen("context.bin", "rb");
-  if (!f) {
-    vp9_zero(context_counters);
-  } else {
-    fread(context_counters, sizeof(context_counters), 1, f);
-    fclose(f);
-  }
-
-  f = fopen("treeupdate.bin", "rb");
-  if (!f) {
-    vpx_memset(tree_update_hist, 0, sizeof(tree_update_hist));
-  } else {
-    fread(tree_update_hist, sizeof(tree_update_hist), 1, f);
-    fclose(f);
-  }
-}
-
-static void print_counter(FILE *f, vp9_coeff_accum *context_counters,
-                          int block_types, const char *header) {
-  int type, ref, band, pt, t;
-
-  fprintf(f, "static const vp9_coeff_count %s = {\n", header);
-
-#define Comma(X) (X ? "," : "")
-  type = 0;
-  do {
-    ref = 0;
-    fprintf(f, "%s\n  { /* block Type %d */", Comma(type), type);
-    do {
-      fprintf(f, "%s\n    { /* %s */", Comma(type), ref ? "Inter" : "Intra");
-      band = 0;
-      do {
-        fprintf(f, "%s\n      { /* Coeff Band %d */", Comma(band), band);
-        pt = 0;
-        do {
-          fprintf(f, "%s\n        {", Comma(pt));
-
-          t = 0;
-          do {
-            const int64_t x = context_counters[type][ref][band][pt][t];
-            const int y = (int) x;
-
-            assert(x == (int64_t) y);  /* no overflow handling yet */
-            fprintf(f, "%s %d", Comma(t), y);
-          } while (++t < 1 + MAX_ENTROPY_TOKENS);
-          fprintf(f, "}");
-        } while (++pt < PREV_COEF_CONTEXTS);
-        fprintf(f, "\n      }");
-      } while (++band < COEF_BANDS);
-      fprintf(f, "\n    }");
-    } while (++ref < REF_TYPES);
-    fprintf(f, "\n  }");
-  } while (++type < block_types);
-  fprintf(f, "\n};\n");
-}
-
-static void print_probs(FILE *f, vp9_coeff_accum *context_counters,
-                        int block_types, const char *header) {
-  int type, ref, band, pt, t;
-
-  fprintf(f, "static const vp9_coeff_probs %s = {", header);
-
-  type = 0;
-#define Newline(x, spaces) (x ? " " : "\n" spaces)
-  do {
-    fprintf(f, "%s%s{ /* block Type %d */",
-            Comma(type), Newline(type, "  "), type);
-    ref = 0;
-    do {
-      fprintf(f, "%s%s{ /* %s */",
-              Comma(band), Newline(band, "    "), ref ? "Inter" : "Intra");
-      band = 0;
-      do {
-        fprintf(f, "%s%s{ /* Coeff Band %d */",
-                Comma(band), Newline(band, "      "), band);
-        pt = 0;
-        do {
-          unsigned int branch_ct[ENTROPY_NODES][2];
-          unsigned int coef_counts[MAX_ENTROPY_TOKENS + 1];
-          vp9_prob coef_probs[ENTROPY_NODES];
-
-          if (pt >= 3 && band == 0)
-            break;
-          for (t = 0; t < MAX_ENTROPY_TOKENS + 1; ++t)
-            coef_counts[t] = context_counters[type][ref][band][pt][t];
-          vp9_tree_probs_from_distribution(vp9_coef_tree, coef_probs,
-                                           branch_ct, coef_counts, 0);
-          branch_ct[0][1] = coef_counts[MAX_ENTROPY_TOKENS] - branch_ct[0][0];
-          coef_probs[0] = get_binary_prob(branch_ct[0][0], branch_ct[0][1]);
-          fprintf(f, "%s\n      {", Comma(pt));
-
-          t = 0;
-          do {
-            fprintf(f, "%s %3d", Comma(t), coef_probs[t]);
-          } while (++t < ENTROPY_NODES);
-
-          fprintf(f, " }");
-        } while (++pt < PREV_COEF_CONTEXTS);
-        fprintf(f, "\n      }");
-      } while (++band < COEF_BANDS);
-      fprintf(f, "\n    }");
-    } while (++ref < REF_TYPES);
-    fprintf(f, "\n  }");
-  } while (++type < block_types);
-  fprintf(f, "\n};\n");
-}
-
-void print_context_counters() {
-  FILE *f = fopen("vp9_context.c", "w");
-
-  fprintf(f, "#include \"vp9_entropy.h\"\n");
-  fprintf(f, "\n/* *** GENERATED FILE: DO NOT EDIT *** */\n\n");
-
-  /* print counts */
-  print_counter(f, context_counters[TX_4X4], BLOCK_TYPES,
-                "vp9_default_coef_counts_4x4[BLOCK_TYPES]");
-  print_counter(f, context_counters[TX_8X8], BLOCK_TYPES,
-                "vp9_default_coef_counts_8x8[BLOCK_TYPES]");
-  print_counter(f, context_counters[TX_16X16], BLOCK_TYPES,
-                "vp9_default_coef_counts_16x16[BLOCK_TYPES]");
-  print_counter(f, context_counters[TX_32X32], BLOCK_TYPES,
-                "vp9_default_coef_counts_32x32[BLOCK_TYPES]");
-
-  /* print coefficient probabilities */
-  print_probs(f, context_counters[TX_4X4], BLOCK_TYPES,
-              "default_coef_probs_4x4[BLOCK_TYPES]");
-  print_probs(f, context_counters[TX_8X8], BLOCK_TYPES,
-              "default_coef_probs_8x8[BLOCK_TYPES]");
-  print_probs(f, context_counters[TX_16X16], BLOCK_TYPES,
-              "default_coef_probs_16x16[BLOCK_TYPES]");
-  print_probs(f, context_counters[TX_32X32], BLOCK_TYPES,
-              "default_coef_probs_32x32[BLOCK_TYPES]");
-
-  fclose(f);
-
-  f = fopen("context.bin", "wb");
-  fwrite(context_counters, sizeof(context_counters), 1, f);
-  fclose(f);
-}
-#endif
-
 void vp9_tokenize_initialize() {
   fill_value_tokens();
 }
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index b78e100..e24e31b 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -28,9 +28,6 @@
   uint8_t         skip_eob_node;
 } TOKENEXTRA;
 
-typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
-                               [MAX_ENTROPY_TOKENS + 1];
-
 int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE bsize);
 int vp9_is_skippable_in_plane(MACROBLOCKD *xd, BLOCK_SIZE bsize,
                               int plane);
@@ -39,13 +36,6 @@
 void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run,
                      BLOCK_SIZE bsize);
 
-#ifdef ENTROPY_STATS
-void init_context_counters();
-void print_context_counters();
-
-extern vp9_coeff_accum context_counters[TX_SIZES][BLOCK_TYPES];
-#endif
-
 extern const int *vp9_dct_value_cost_ptr;
 /* TODO: The Token field should be broken out into a separate char array to
  *  improve cache locality, since it's needed for costing when the rest of the