Update cdf and mode rate per superblock in RDO

Update cdf after each superblock being finalized, and re-compute
symbol costs. Affected symbols include prediction parameters,
tx type, and new motion vector.
BDRate: -0.121% lowres, -0.089% midres

Change-Id: I005ef382762d6e3423f933cca0a0b7b40288e8bf
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 30f14d4..f368456 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -148,6 +148,8 @@
 
   int *nmvjointcost;
   int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
+  int nmvcost_array[NMV_CONTEXTS][2][MV_VALS];
+  int nmvcost_hp_array[NMV_CONTEXTS][2][MV_VALS];
   int *nmvcost[NMV_CONTEXTS][2];
   int *nmvcost_hp[NMV_CONTEXTS][2];
   int **mv_cost_stack[NMV_CONTEXTS];
@@ -193,6 +195,7 @@
 
 #if CONFIG_EXT_INTER
   int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
+  int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES];
 #if CONFIG_COMPOUND_SINGLEREF
   int inter_singleref_comp_mode_cost[INTER_MODE_CONTEXTS]
                                     [INTER_SINGLEREF_COMP_MODES];
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e4df8ab..2ff56a6 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -477,6 +477,9 @@
          has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
       ++counts->switchable_interp[ctx][mbmi->interp_filter[dir]];
+      update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx],
+                 av1_switchable_interp_ind[mbmi->interp_filter[dir]],
+                 SWITCHABLE_FILTERS);
     }
   }
 }
@@ -1550,6 +1553,7 @@
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const BLOCK_SIZE bsize = mbmi->sb_type;
+  FRAME_CONTEXT *fc = xd->tile_ctx;
 
 #if CONFIG_DELTA_Q
   // delta quant applies to both intra and inter
@@ -1594,6 +1598,10 @@
       if (!supertx_enabled)
 #endif
         counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
+#if CONFIG_NEW_MULTISYMBOL
+      update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
+                 inter_block, 2);
+#endif
       // If the segment reference feature is enabled we have only a single
       // reference frame allowed for the segment so exclude it from
       // the reference frame counts used to work out probabilities.
@@ -1611,12 +1619,21 @@
             // This flag is also updated for 4x4 blocks
             rdc->single_ref_used_flag = 1;
 #if !SUB8X8_COMP_REF
-          if (mbmi->sb_type != BLOCK_4X4)
+          if (mbmi->sb_type != BLOCK_4X4) {
             counts->comp_inter[av1_get_reference_mode_context(cm, xd)]
                               [has_second_ref(mbmi)]++;
+#if CONFIG_NEW_MULTISYMBOL
+            update_cdf(av1_get_reference_mode_cdf(cm, xd), has_second_ref(mbmi),
+                       2);
+#endif
+          }
 #else
           counts->comp_inter[av1_get_reference_mode_context(cm, xd)]
                             [has_second_ref(mbmi)]++;
+#if CONFIG_NEW_MULTISYMBOL
+          update_cdf(av1_get_reference_mode_cdf(cm, xd), has_second_ref(mbmi),
+                     2);
+#endif
 #endif
         }
 
@@ -1725,11 +1742,24 @@
           const int bsize_group = size_group_lookup[bsize];
           if (mbmi->ref_frame[1] == INTRA_FRAME) {
             counts->interintra[bsize_group][1]++;
+#if CONFIG_NEW_MULTISYMBOL
+            update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
+#endif
             counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
-            if (is_interintra_wedge_used(bsize))
+            update_cdf(fc->interintra_mode_cdf[bsize_group],
+                       mbmi->interintra_mode, INTERINTRA_MODES);
+            if (is_interintra_wedge_used(bsize)) {
               counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
+#if CONFIG_NEW_MULTISYMBOL
+              update_cdf(fc->wedge_interintra_cdf[bsize],
+                         mbmi->use_wedge_interintra, 2);
+#endif
+            }
           } else {
             counts->interintra[bsize_group][0]++;
+#if CONFIG_NEW_MULTISYMBOL
+            update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
+#endif
           }
         }
 #endif  // CONFIG_INTERINTRA
@@ -1769,17 +1799,21 @@
           {
             if (motion_allowed == WARPED_CAUSAL) {
               counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
-              update_cdf(xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
-                         mbmi->motion_mode, MOTION_MODES);
+              update_cdf(fc->motion_mode_cdf[mbmi->sb_type], mbmi->motion_mode,
+                         MOTION_MODES);
             } else if (motion_allowed == OBMC_CAUSAL) {
               counts->obmc[mbmi->sb_type][mbmi->motion_mode == OBMC_CAUSAL]++;
+#if CONFIG_NEW_MULTISYMBOL
+              update_cdf(fc->obmc_cdf[mbmi->sb_type],
+                         mbmi->motion_mode == OBMC_CAUSAL, 2);
+#endif
             }
           }
 #else
         if (motion_allowed > SIMPLE_TRANSLATION) {
           counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
-          update_cdf(xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
-                     mbmi->motion_mode, MOTION_MODES);
+          update_cdf(fc->motion_mode_cdf[mbmi->sb_type], mbmi->motion_mode,
+                     MOTION_MODES);
         }
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 
@@ -1812,7 +1846,16 @@
             && mbmi->motion_mode == SIMPLE_TRANSLATION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
             ) {
-          counts->compound_interinter[bsize][mbmi->interinter_compound_type]++;
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
+#endif
+            counts
+                ->compound_interinter[bsize][mbmi->interinter_compound_type]++;
+            update_cdf(fc->compound_type_cdf[bsize],
+                       mbmi->interinter_compound_type, COMPOUND_TYPES);
+#if CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
+          }
+#endif
         }
 #endif  // CONFIG_EXT_INTER
       }
@@ -1826,6 +1869,8 @@
       if (has_second_ref(mbmi)) {
         mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
         ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
+        update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
+                   INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
 #if CONFIG_COMPOUND_SINGLEREF
       } else if (is_inter_singleref_comp_mode(mode)) {
         mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]];
@@ -4461,7 +4506,6 @@
 
   // Initialize the left context for the new SB row
   av1_zero_left_context(xd);
-  av1_fill_mode_rates(cm, x, xd->tile_ctx);
 
 #if CONFIG_DELTA_Q
   // Reset delta for every tile
@@ -4498,6 +4542,7 @@
     av1_fill_token_costs_from_cdf(x->token_tail_costs,
                                   x->e_mbd.tile_ctx->coef_tail_cdfs);
 #endif
+    av1_fill_mode_rates(cm, x, xd->tile_ctx);
 
     if (sf->adaptive_pred_interp_filter) {
       for (i = 0; i < leaf_nodes; ++i)
@@ -5661,21 +5706,18 @@
                             const MODE_INFO *mi, const MODE_INFO *above_mi,
                             const MODE_INFO *left_mi, const int intraonly,
                             const int mi_row, const int mi_col) {
+  FRAME_CONTEXT *fc = xd->tile_ctx;
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
-#if CONFIG_ENTROPY_STATS
   const PREDICTION_MODE y_mode = mbmi->mode;
   const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
-#else   // CONFIG_ENTROPY_STATS
   (void)counts;
   (void)above_mi;
   (void)left_mi;
   (void)intraonly;
-#endif  // CONFIG_ENTROPY_STATS
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int unify_bsize = CONFIG_CB4X4;
 
   if (bsize < BLOCK_8X8 && !unify_bsize) {
-#if CONFIG_ENTROPY_STATS
     int idx, idy;
     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
@@ -5684,24 +5726,36 @@
         const int bidx = idy * 2 + idx;
         const PREDICTION_MODE bmode = mi->bmi[bidx].as_mode;
         if (intraonly) {
+#if CONFIG_ENTROPY_STATS
           const PREDICTION_MODE a = av1_above_block_mode(mi, above_mi, bidx);
           const PREDICTION_MODE l = av1_left_block_mode(mi, left_mi, bidx);
           ++counts->kf_y_mode[a][l][bmode];
+#endif  // CONFIG_ENTROPY_STATS
+          update_cdf(get_y_mode_cdf(fc, mi, above_mi, left_mi, bidx),
+                     av1_intra_mode_ind[bmode], INTRA_MODES);
         } else {
+#if CONFIG_ENTROPY_STATS
           ++counts->y_mode[0][bmode];
+#endif  // CONFIG_ENTROPY_STATS
+          update_cdf(fc->y_mode_cdf[0], av1_intra_mode_ind[bmode], INTRA_MODES);
         }
       }
-#endif  // CONFIG_ENTROPY_STATS
   } else {
-#if CONFIG_ENTROPY_STATS
     if (intraonly) {
+#if CONFIG_ENTROPY_STATS
       const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0);
       const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0);
       ++counts->kf_y_mode[above][left][y_mode];
-    } else {
-      ++counts->y_mode[size_group_lookup[bsize]][y_mode];
-    }
 #endif  // CONFIG_ENTROPY_STATS
+      update_cdf(get_y_mode_cdf(fc, mi, above_mi, left_mi, 0),
+                 av1_intra_mode_ind[y_mode], INTRA_MODES);
+    } else {
+#if CONFIG_ENTROPY_STATS
+      ++counts->y_mode[size_group_lookup[bsize]][y_mode];
+#endif  // CONFIG_ENTROPY_STATS
+      update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]],
+                 av1_intra_mode_ind[y_mode], INTRA_MODES);
+    }
 #if CONFIG_FILTER_INTRA
     if (mbmi->mode == DC_PRED && mbmi->palette_mode_info.palette_size[0] == 0) {
       const int use_filter_intra_mode =
@@ -5743,6 +5797,7 @@
 #if CONFIG_ENTROPY_STATS
   ++counts->uv_mode[y_mode][uv_mode];
 #endif  // CONFIG_ENTROPY_STATS
+  update_cdf(fc->uv_mode_cdf[y_mode], av1_intra_mode_ind[uv_mode], INTRA_MODES);
 }
 
 #if CONFIG_VAR_TX
@@ -5889,6 +5944,7 @@
                               FRAME_COUNTS *counts) {
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int is_inter = is_inter_block(mbmi);
+  FRAME_CONTEXT *fc = xd->tile_ctx;
 
 #if !CONFIG_TXK_SEL
   TX_TYPE tx_type = mbmi->tx_type;
@@ -5909,9 +5965,14 @@
     if (eset > 0) {
       if (is_inter) {
         ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]][tx_type];
+        update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
+                   av1_ext_tx_inter_ind[eset][tx_type], ext_tx_cnt_inter[eset]);
       } else {
         ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][mbmi->mode]
                               [tx_type];
+        update_cdf(
+            fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][mbmi->mode],
+            av1_ext_tx_intra_ind[eset][tx_type], ext_tx_cnt_intra[eset]);
       }
     }
   }
@@ -5924,9 +5985,15 @@
       !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     if (is_inter) {
       ++counts->inter_ext_tx[tx_size][tx_type];
+      update_cdf(fc->inter_ext_tx_cdf[tx_size], av1_ext_tx_ind[tx_type],
+                 TX_TYPES);
     } else {
       ++counts->intra_ext_tx[tx_size][intra_mode_to_tx_type_context[mbmi->mode]]
                             [tx_type];
+      update_cdf(
+          fc->intra_ext_tx_cdf[tx_size]
+                              [intra_mode_to_tx_type_context[mbmi->mode]],
+          av1_ext_tx_ind[tx_type], TX_TYPES);
     }
   }
 #endif  // CONFIG_EXT_TX
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index fd61fe6..52a3c2c 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -97,22 +97,31 @@
 
   sign_cost[0] = av1_cost_zero(mvcomp->sign);
   sign_cost[1] = av1_cost_one(mvcomp->sign);
-  av1_cost_tokens(class_cost, mvcomp->classes, av1_mv_class_tree);
+  av1_cost_tokens_from_cdf(class_cost, mvcomp->class_cdf, NULL);
+#if CONFIG_NEW_MULTISYMBOL
+  av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL);
+#else
   av1_cost_tokens(class0_cost, mvcomp->class0, av1_mv_class0_tree);
+#endif
   for (i = 0; i < MV_OFFSET_BITS; ++i) {
     bits_cost[i][0] = av1_cost_zero(mvcomp->bits[i]);
     bits_cost[i][1] = av1_cost_one(mvcomp->bits[i]);
   }
 
   for (i = 0; i < CLASS0_SIZE; ++i)
-    av1_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], av1_mv_fp_tree);
-  av1_cost_tokens(fp_cost, mvcomp->fp, av1_mv_fp_tree);
+    av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], NULL);
+  av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
 
   if (precision > MV_SUBPEL_LOW_PRECISION) {
+#if CONFIG_NEW_MULTISYMBOL
+    av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
+    av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
+#else
     class0_hp_cost[0] = av1_cost_zero(mvcomp->class0_hp);
     class0_hp_cost[1] = av1_cost_one(mvcomp->class0_hp);
     hp_cost[0] = av1_cost_zero(mvcomp->hp);
     hp_cost[1] = av1_cost_one(mvcomp->hp);
+#endif
   }
   mvcost[0] = 0;
   for (v = 1; v <= MV_MAX; ++v) {
@@ -217,15 +226,64 @@
 void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
                               const nmv_context *ctx,
                               MvSubpelPrecision precision) {
-  av1_cost_tokens(mvjoint, ctx->joints, av1_mv_joint_tree);
+  av1_cost_tokens_from_cdf(mvjoint, ctx->joint_cdf, NULL);
   build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
   build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
 }
 
+static void inc_mv_component_cdf(int v, nmv_component *mvcomp,
+                                 MvSubpelPrecision precision) {
+  int s, z, c, o, d, f;
+  assert(v != 0);
+  s = v < 0;
+  z = (s ? -v : v) - 1; /* magnitude - 1 */
+
+  c = av1_get_mv_class(z, &o);
+  update_cdf(mvcomp->class_cdf, c, MV_CLASSES);
+  (void)precision;
+
+  d = (o >> 3);     /* int mv data */
+  f = (o >> 1) & 3; /* fractional pel mv data */
+#if CONFIG_NEW_MULTISYMBOL
+  int e = (o & 1); /* high precision mv data */
+#endif
+
+  if (c == MV_CLASS_0) {
+    update_cdf(mvcomp->class0_fp_cdf[d], f, MV_FP_SIZE);
+#if CONFIG_NEW_MULTISYMBOL
+    if (precision > MV_SUBPEL_LOW_PRECISION)
+      update_cdf(mvcomp->class0_hp_cdf, e, 2);
+#endif
+  } else {
+    update_cdf(mvcomp->fp_cdf, f, MV_FP_SIZE);
+#if CONFIG_NEW_MULTISYMBOL
+    if (precision > MV_SUBPEL_LOW_PRECISION) update_cdf(mvcomp->hp_cdf, e, 2);
+#endif
+  }
+}
+
+static void inc_mv_cdf(const MV *mv, nmv_context *const nmvc,
+                       MvSubpelPrecision precision) {
+  const MV_JOINT_TYPE j = av1_get_mv_joint(mv);
+  update_cdf(nmvc->joint_cdf, j, MV_JOINTS);
+
+  if (mv_joint_vertical(j)) {
+    int v = mv->row;
+    nmv_component *mvcomp = &nmvc->comps[0];
+    inc_mv_component_cdf(v, mvcomp, precision);
+  }
+
+  if (mv_joint_horizontal(j)) {
+    int v = mv->col;
+    nmv_component *mvcomp = &nmvc->comps[1];
+    inc_mv_component_cdf(v, mvcomp, precision);
+  }
+}
+
 #if CONFIG_EXT_INTER
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2], const int_mv pred_mvs[2],
-                    nmv_context_counts *nmv_counts) {
+                    nmv_context_counts *nmv_counts, FRAME_CONTEXT *fc) {
   int i;
   PREDICTION_MODE mode = mbmi->mode;
 
@@ -241,6 +299,7 @@
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
       (void)pred_mvs;
       av1_inc_mv(&diff, counts, 1);
+      inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv;
@@ -252,6 +311,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
     av1_inc_mv(&diff, counts, 1);
+    inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
     const MV diff = { mvs[0].as_mv.row - ref->row,
@@ -262,6 +322,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
     av1_inc_mv(&diff, counts, 1);
+    inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
 #if CONFIG_COMPOUND_SINGLEREF
   } else {
     assert(  // mode == SR_NEAREST_NEWMV ||
@@ -278,17 +339,19 @@
       diff.row = mvs[0].as_mv.row - ref->row;
       diff.col = mvs[0].as_mv.col - ref->col;
       av1_inc_mv(&diff, counts, 1);
+      inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
     }
     diff.row = mvs[1].as_mv.row - ref->row;
     diff.col = mvs[1].as_mv.col - ref->col;
     av1_inc_mv(&diff, counts, 1);
+    inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
 #endif  // CONFIG_COMPOUND_SINGLEREF
   }
 }
 
 static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2],
                            const MB_MODE_INFO_EXT *mbmi_ext,
-                           nmv_context_counts *nmv_counts) {
+                           nmv_context_counts *nmv_counts, FRAME_CONTEXT *fc) {
   int i;
   PREDICTION_MODE mode = mi->bmi[block].as_mode;
   const MB_MODE_INFO *mbmi = &mi->mbmi;
@@ -304,6 +367,7 @@
                       mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx);
       nmv_context_counts *counts = &nmv_counts[nmv_ctx];
       av1_inc_mv(&diff, counts, 1);
+      inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
     }
   } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
     const MV *ref = &mi->bmi[block].ref_mv[1].as_mv;
@@ -315,6 +379,7 @@
                     mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
     av1_inc_mv(&diff, counts, 1);
+    inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
   } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
     const MV *ref = &mi->bmi[block].ref_mv[0].as_mv;
     const MV diff = { mvs[0].as_mv.row - ref->row,
@@ -325,12 +390,13 @@
                     mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx);
     nmv_context_counts *counts = &nmv_counts[nmv_ctx];
     av1_inc_mv(&diff, counts, 1);
+    inc_mv_cdf(&diff, &fc->nmvc[nmv_ctx], 1);
   }
 }
 #else   // !CONFIG_EXT_INTER
 static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext,
                     const int_mv mvs[2], const int_mv pred_mvs[2],
-                    nmv_context_counts *nmv_counts) {
+                    nmv_context_counts *nmv_counts, FRAME_CONTEXT *fc) {
   int i;
 
   for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
@@ -343,6 +409,7 @@
     const MV diff = { mvs[i].as_mv.row - ref->row,
                       mvs[i].as_mv.col - ref->col };
     av1_inc_mv(&diff, counts, 1);
+    inc_mv_cdf(&diff, fc->nmvc[nmv_ctx], 1);
   }
 }
 #endif  // CONFIG_EXT_INTER
@@ -369,11 +436,12 @@
 
 #if CONFIG_EXT_INTER
         if (have_newmv_in_inter_mode(mi->bmi[i].as_mode))
-          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv);
+          inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv,
+                         xd->tile_ctx);
 #else
         if (mi->bmi[i].as_mode == NEWMV)
           inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, mi->bmi[i].pred_mv,
-                  td->counts->mv);
+                  td->counts->mv, xd->tile_ctx);
 #endif  // CONFIG_EXT_INTER
       }
     }
@@ -383,6 +451,7 @@
 #else
     if (mbmi->mode == NEWMV)
 #endif  // CONFIG_EXT_INTER
-      inc_mvs(mbmi, mbmi_ext, mbmi->mv, mbmi->pred_mv, td->counts->mv);
+      inc_mvs(mbmi, mbmi_ext, mbmi->mv, mbmi->pred_mv, td->counts->mv,
+              xd->tile_ctx);
   }
 }
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 972c9bf..2d3a741 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -249,23 +249,6 @@
   }
 }
 
-void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv) {
-  MACROBLOCK *const mb = &cpi->td.mb;
-  cpi->common.allow_high_precision_mv = allow_high_precision_mv;
-
-  if (cpi->common.allow_high_precision_mv) {
-    int i;
-    for (i = 0; i < NMV_CONTEXTS; ++i) {
-      mb->mv_cost_stack[i] = mb->nmvcost_hp[i];
-    }
-  } else {
-    int i;
-    for (i = 0; i < NMV_CONTEXTS; ++i) {
-      mb->mv_cost_stack[i] = mb->nmvcost[i];
-    }
-  }
-}
-
 static BLOCK_SIZE select_sb_size(const AV1_COMP *const cpi) {
 #if CONFIG_EXT_PARTITION
   if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_64X64)
@@ -540,18 +523,11 @@
 static void save_coding_context(AV1_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
   AV1_COMMON *cm = &cpi->common;
-  int i;
 
   // Stores a snapshot of key state variables which can subsequently be
   // restored with a call to av1_restore_coding_context. These functions are
   // intended for use in a re-code loop in av1_compress_frame where the
   // quantizer value is adjusted between loop iterations.
-  for (i = 0; i < NMV_CONTEXTS; ++i) {
-    av1_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]);
-    av1_copy(cc->nmv_costs, cpi->nmv_costs);
-    av1_copy(cc->nmv_costs_hp, cpi->nmv_costs_hp);
-  }
-
   av1_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
   av1_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
 
@@ -561,16 +537,9 @@
 static void restore_coding_context(AV1_COMP *cpi) {
   CODING_CONTEXT *const cc = &cpi->coding_context;
   AV1_COMMON *cm = &cpi->common;
-  int i;
 
   // Restore key state variables to the snapshot state stored in the
   // previous call to av1_save_coding_context.
-  for (i = 0; i < NMV_CONTEXTS; ++i) {
-    av1_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]);
-    av1_copy(cpi->nmv_costs, cc->nmv_costs);
-    av1_copy(cpi->nmv_costs_hp, cc->nmv_costs_hp);
-  }
-
   av1_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
   av1_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
 
@@ -2327,7 +2296,7 @@
   set_compound_tools(cm);
 #endif  // CONFIG_EXT_INTER
   av1_reset_segment_features(cm);
-  av1_set_high_precision_mv(cpi, 0);
+  cm->allow_high_precision_mv = 0;
 
   set_rc_buffer_sizes(rc, &cpi->oxcf);
 
@@ -2457,11 +2426,6 @@
 
   realloc_segmentation_maps(cpi);
 
-  for (i = 0; i < NMV_CONTEXTS; ++i) {
-    memset(cpi->nmv_costs, 0, sizeof(cpi->nmv_costs));
-    memset(cpi->nmv_costs_hp, 0, sizeof(cpi->nmv_costs_hp));
-  }
-
   for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
        i++) {
     CHECK_MEM_ERROR(
@@ -2524,13 +2488,6 @@
 
   cpi->first_time_stamp_ever = INT64_MAX;
 
-  for (i = 0; i < NMV_CONTEXTS; ++i) {
-    cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX];
-    cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX];
-    cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX];
-    cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX];
-  }
-
 #ifdef OUTPUT_YUV_SKINMAP
   yuv_skinmap_file = fopen("skinmap.yuv", "ab");
 #endif
@@ -3918,9 +3875,8 @@
   *q = av1_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
 #endif
 
-  if (!frame_is_intra_only(cm)) {
-    av1_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
-  }
+  if (!frame_is_intra_only(cm))
+    cm->allow_high_precision_mv = (*q) < HIGH_PRECISION_MV_QTHRESH;
 
   // Configure experimental use of segmentation for enhanced coding of
   // static regions if indicated.
@@ -5733,7 +5689,7 @@
 
   aom_usec_timer_start(&cmptimer);
 
-  av1_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
+  cm->allow_high_precision_mv = ALTREF_HIGH_PRECISION_MV;
 
   // Is multi-arf enabled.
   // Note that at the moment multi_arf is only configured for 2 pass VBR
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 8a3d9d3..e081012 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -58,10 +58,6 @@
 #endif  // CONFIG_SPEED_REFS
 
 typedef struct {
-  int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS];
-  int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
-  int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
-
   // 0 = Intra, Last, GF, ARF
   signed char last_ref_lf_deltas[TOTAL_REFS_PER_FRAME];
   // 0 = ZERO_MV, MV
@@ -456,9 +452,6 @@
   int gmparams_cost[TOTAL_REFS_PER_FRAME];
 #endif  // CONFIG_GLOBAL_MOTION
 
-  int nmv_costs[NMV_CONTEXTS][2][MV_VALS];
-  int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS];
-
   int64_t last_time_stamp_seen;
   int64_t last_end_time_stamp_seen;
   int64_t first_time_stamp_ever;
@@ -748,7 +741,6 @@
 
 void av1_update_reference_frames(AV1_COMP *cpi);
 
-void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv);
 #if CONFIG_TEMPMV_SIGNALING
 void av1_set_temporal_mv_prediction(AV1_COMP *cpi, int allow_tempmv_prediction);
 #endif
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 8a2a63c..6dfeb68 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -61,6 +61,29 @@
   4,  4,  8,  8, 16, 16
 };
 
+void av1_fill_nmv_costs(AV1_COMMON *const cm, MACROBLOCK *x,
+                        FRAME_CONTEXT *fc) {
+  for (int i = 0; i < NMV_CONTEXTS; ++i) {
+    memset(x->nmvcost_array, 0, sizeof(x->nmvcost_array));
+    memset(x->nmvcost_hp_array, 0, sizeof(x->nmvcost_hp_array));
+    x->nmvcost[i][0] = &x->nmvcost_array[i][0][MV_MAX];
+    x->nmvcost[i][1] = &x->nmvcost_array[i][1][MV_MAX];
+    x->nmvcost_hp[i][0] = &x->nmvcost_hp_array[i][0][MV_MAX];
+    x->nmvcost_hp[i][1] = &x->nmvcost_hp_array[i][1][MV_MAX];
+    if (cm->allow_high_precision_mv)
+      x->mv_cost_stack[i] = x->nmvcost_hp[i];
+    else
+      x->mv_cost_stack[i] = x->nmvcost[i];
+
+    av1_build_nmv_cost_table(
+        x->nmv_vec_cost[i],
+        cm->allow_high_precision_mv ? x->nmvcost_hp[i] : x->nmvcost[i],
+        &fc->nmvc[i], cm->allow_high_precision_mv);
+  }
+  x->mvcost = x->mv_cost_stack[0];
+  x->nmvjointcost = x->nmv_vec_cost[0];
+}
+
 void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
                          FRAME_CONTEXT *fc) {
   int i, j;
@@ -200,8 +223,15 @@
   av1_cost_tokens(x->switchable_restore_cost, fc->switchable_restore_prob,
                   av1_switchable_restore_tree);
 #endif  // CONFIG_LOOP_RESTORATION
+#if CONFIG_INTRABC
+  av1_fill_nmv_costs(cm, x, fc);
+#endif
 
   if (!frame_is_intra_only(cm)) {
+#if !CONFIG_INTRABC
+    av1_fill_nmv_costs(cm, x, fc);
+#endif
+
     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
 #if CONFIG_NEW_MULTISYMBOL
       av1_cost_tokens_from_cdf(x->newmv_mode_cost[i], fc->newmv_cdf[i], NULL);
@@ -241,6 +271,9 @@
     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
       av1_cost_tokens_from_cdf(x->inter_compound_mode_cost[i],
                                fc->inter_compound_mode_cdf[i], NULL);
+    for (i = 0; i < BLOCK_SIZES_ALL; ++i)
+      av1_cost_tokens_from_cdf(x->compound_type_cost[i],
+                               fc->compound_type_cdf[i], NULL);
 #if CONFIG_COMPOUND_SINGLEREF
     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
       av1_cost_tokens_from_cdf(x->inter_singleref_comp_mode_cost[i],
@@ -536,7 +569,6 @@
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   RD_OPT *const rd = &cpi->rd;
-  int nmv_ctx;
 
   aom_clear_system_state();
 
@@ -546,15 +578,7 @@
 
   set_block_thresholds(cm, rd);
 
-  for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) {
-    av1_build_nmv_cost_table(
-        x->nmv_vec_cost[nmv_ctx],
-        cm->allow_high_precision_mv ? x->nmvcost_hp[nmv_ctx]
-                                    : x->nmvcost[nmv_ctx],
-        &cm->fc->nmvc[nmv_ctx], cm->allow_high_precision_mv);
-  }
-  x->mvcost = x->mv_cost_stack[0];
-  x->nmvjointcost = x->nmv_vec_cost[0];
+  av1_fill_nmv_costs(cm, x, cm->fc);
 
 #if CONFIG_INTRABC
   if (frame_is_intra_only(cm) && cm->allow_screen_content_tools &&
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 180001a..69bc56c 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9232,7 +9232,6 @@
     int best_tmp_rate_mv = rate_mv;
     int tmp_skip_txfm_sb;
     int64_t tmp_skip_sse_sb;
-    int compound_type_cost[COMPOUND_TYPES];
     DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
     DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
     uint8_t *preds0[1] = { pred0 };
@@ -9244,6 +9243,7 @@
     masked_compound_used = masked_compound_used && cm->allow_masked_compound;
 #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
     COMPOUND_TYPE cur_type;
+    int best_compmode_interinter_cost = 0;
 
     best_mv[0].as_int = cur_mv[0].as_int;
     best_mv[1].as_int = cur_mv[1].as_int;
@@ -9265,8 +9265,6 @@
 #endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
 
     if (masked_compound_used) {
-      av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
-                      av1_compound_type_tree);
       // get inter predictors to use for masked compound modes
       av1_build_inter_predictors_for_planes_single_buf(
           xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
@@ -9288,7 +9286,7 @@
         else
 #endif  // CONFIG_WEDGE && CONFIG_COMPOUND_SEGMENT
           masked_type_cost +=
-              compound_type_cost[mbmi->interinter_compound_type];
+              x->compound_type_cost[bsize][mbmi->interinter_compound_type];
       }
       rs2 = av1_cost_literal(get_interinter_compound_type_bits(
                 bsize, mbmi->interinter_compound_type)) +
@@ -9342,6 +9340,7 @@
 #endif  // CONFIG_COMPOUND_SEGMENT
         best_compound_data.interinter_compound_type =
             mbmi->interinter_compound_type;
+        best_compmode_interinter_cost = rs2;
         if (have_newmv_in_inter_mode(this_mode)) {
           if (use_masked_motion_search(cur_type)) {
             best_tmp_rate_mv = tmp_rate_mv;
@@ -9386,12 +9385,7 @@
 
     pred_exists = 0;
 
-    compmode_interinter_cost =
-        av1_cost_literal(get_interinter_compound_type_bits(
-            bsize, mbmi->interinter_compound_type)) +
-        (masked_compound_used
-             ? compound_type_cost[mbmi->interinter_compound_type]
-             : 0);
+    compmode_interinter_cost = best_compmode_interinter_cost;
   }
 #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT