motion_var: computer motion_mode_cost from cdf

Initialize mode cost using frame-level cdf.
Also in rd selection stage, cdf is updated per 64x64.
Performance gain 0.20%

Still suboptimal since in real bitstream packing, cdf is updated
per symbol. Per symbol update in RDO is work in progress.

Change-Id: I5062af91d8b00e5bf4c08abd0a7bfb0e5b27a619
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 7b6eb0b..cbe4675 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -149,6 +149,15 @@
   uint8_t *above_pred_buf;
   uint8_t *left_pred_buf;
 #endif  // CONFIG_MOTION_VAR
+#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
+  int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
+#if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+  int motion_mode_cost1[BLOCK_SIZES_ALL][2];
+#endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
+#if CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+  int ncobmc_mode_cost[ADAPT_OVERLAP_BLOCKS][MAX_NCOBMC_MODES];
+#endif  // CONFIG_MOTION_VAR && CONFIG_NCOBMC_ADAPT_WEIGHT
+#endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
 #if CONFIG_PALETTE
   PALETTE_BUFFER *palette_buffer;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index d13eb42..0e8364d 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1759,14 +1759,20 @@
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
           {
-            if (motion_allowed == WARPED_CAUSAL)
+            if (motion_allowed == WARPED_CAUSAL) {
               counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
-            else if (motion_allowed == OBMC_CAUSAL)
+              update_cdf(xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
+                         mbmi->motion_mode, MOTION_MODES);
+            } else if (motion_allowed == OBMC_CAUSAL) {
               counts->obmc[mbmi->sb_type][mbmi->motion_mode == OBMC_CAUSAL]++;
+            }
           }
 #else
-        if (motion_allowed > SIMPLE_TRANSLATION)
+        if (motion_allowed > SIMPLE_TRANSLATION) {
           counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++;
+          update_cdf(xd->tile_ctx->motion_mode_cdf[mbmi->sb_type],
+                     mbmi->motion_mode, MOTION_MODES);
+        }
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 
 #if CONFIG_NCOBMC_ADAPT_WEIGHT
@@ -4633,6 +4639,11 @@
 #endif  // CONFIG_SPEED_REFS
     }
   }
+  // TODO(yuec) Suboptimal fix. Need to implement per-block update
+  for (int i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
+    av1_cost_tokens_from_cdf(x->motion_mode_cost[i],
+                             xd->tile_ctx->motion_mode_cdf[i], NULL);
+  }
 }
 
 static void init_encode_frame_mb_context(AV1_COMP *cpi) {
@@ -4874,6 +4885,10 @@
 
   av1_setup_across_tile_boundary_info(cm, tile_info);
 
+  for (int i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
+    av1_cost_tokens_from_cdf(td->mb.motion_mode_cost[i],
+                             cm->fc->motion_mode_cdf[i], NULL);
+  }
   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
        mi_row += cm->mib_size) {
     encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 4c59d9d..32f164b 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -464,8 +464,8 @@
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
       for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
-        av1_cost_tokens((int *)cpi->motion_mode_cost[i],
-                        cm->fc->motion_mode_prob[i], av1_motion_mode_tree);
+        av1_cost_tokens_from_cdf(cpi->motion_mode_cost[i],
+                                 cm->fc->motion_mode_cdf[i], NULL);
       }
 #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
       for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c8e5811..e846ae6 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -8544,7 +8544,7 @@
 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
       if (last_motion_mode_allowed == WARPED_CAUSAL)
 #endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
-        rd_stats->rate += cpi->motion_mode_cost[bsize][mbmi->motion_mode];
+        rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
 #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
       else
         rd_stats->rate += cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
@@ -10948,7 +10948,7 @@
         av1_init_rd_stats(&rd_stats);
 #if CONFIG_DIST_8X8 && CONFIG_CB4X4
         // While av1 master uses rd_stats_y.rate through out the codebase,
-        // which is set when handle_inter_moden is called, the daala-dist code
+        // which is set when handle_inter_mode is called, the daala-dist code
         // in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know
         // .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse.
         // The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not
@@ -11429,12 +11429,12 @@
 #endif
             mi);
         if (motion_allowed == WARPED_CAUSAL)
-          *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
+          *returnrate_nocoef -= x->motion_mode_cost[bsize][mbmi->motion_mode];
         else if (motion_allowed == OBMC_CAUSAL)
           *returnrate_nocoef -=
               cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
 #else
-        *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
+        *returnrate_nocoef -= x->motion_mode_cost[bsize][mbmi->motion_mode];
 #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 #endif  // CONFIG_SUPERTX