Optimize PC_TREE to store only winner mode info

PC_TREE is modified to store only the winner reference
mode information. Size of ref_mv_stack, weight,
mode_context and ref_mv_count is reduced in PC_TREE.

Observed memory footprint reduction with similar
encoding time.

Resolution    Tile     Memory reduction
                       Single   Multi
                       Thread   Thread
640x360       2x1      ~16%    ~25% (2 threads)
832x480       2x1      ~13%    ~21% (2 threads)
1280x720      2x2      ~10%    ~25% (4 threads)
1920x1080     4x2      ~5%     ~25% (8 threads)

Memory measuring command:
$ command time -v ./aomenc ...

Change-Id: I5675d289b89b6deabd11274f4210dd81c058b44c
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index 00f7509..ab245d5 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -223,7 +223,7 @@
 void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
                            PICK_MODE_CONTEXT *src_ctx) {
   dst_ctx->mic = src_ctx->mic;
-  dst_ctx->mbmi_ext = src_ctx->mbmi_ext;
+  dst_ctx->mbmi_ext_best = src_ctx->mbmi_ext_best;
 
   dst_ctx->num_4x4_blk = src_ctx->num_4x4_blk;
   dst_ctx->skippable = src_ctx->skippable;
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index 24f002a..d5b4e83 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -28,7 +28,7 @@
 // Structure to hold snapshot of coding context during the mode picking process
 typedef struct {
   MB_MODE_INFO mic;
-  MB_MODE_INFO_EXT mbmi_ext;
+  MB_MODE_INFO_EXT_FRAME mbmi_ext_best;
   uint8_t *color_index_map[2];
   uint8_t *blk_skip;
 
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index ee132e6..3e5f966 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -507,6 +507,21 @@
   x->force_skip = 0;
 }
 
+// This function will copy the best reference mode information from
+// MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT.
+static INLINE void copy_mbmi_ext_frame_to_mbmi_ext(
+    MB_MODE_INFO_EXT *mbmi_ext,
+    const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) {
+  memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
+         sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+  memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
+         sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+  mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
+  mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
+  memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
+         sizeof(mbmi_ext->global_mvs));
+}
+
 static AOM_INLINE void update_state(const AV1_COMP *const cpi, ThreadData *td,
                                     const PICK_MODE_CONTEXT *const ctx,
                                     int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -532,7 +547,8 @@
   assert(mi->sb_type == bsize);
 
   *mi_addr = *mi;
-  *x->mbmi_ext = ctx->mbmi_ext;
+  copy_mbmi_ext_frame_to_mbmi_ext(x->mbmi_ext, &ctx->mbmi_ext_best,
+                                  av1_ref_frame_type(ctx->mic.ref_frame));
 
   memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
 
@@ -687,25 +703,6 @@
   return 0 && !frame_is_intra_only(&cpi->common);
 }
 
-// This function will copy the winner reference mode information from block
-// level (x->mbmi_ext) to frame level (cpi->mbmi_ext_frame_base). This frame
-// level buffer (cpi->mbmi_ext_frame_base) will be used during bitstream
-// preparation.
-static INLINE void copy_winner_ref_mode_from_mbmi_ext(MACROBLOCK *const x) {
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = xd->mi[0];
-  uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-  memcpy(x->mbmi_ext_frame->ref_mv_stack,
-         x->mbmi_ext->ref_mv_stack[ref_frame_type],
-         sizeof(x->mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
-  memcpy(x->mbmi_ext_frame->weight, x->mbmi_ext->weight[ref_frame_type],
-         sizeof(x->mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
-  x->mbmi_ext_frame->mode_context = x->mbmi_ext->mode_context[ref_frame_type];
-  x->mbmi_ext_frame->ref_mv_count = x->mbmi_ext->ref_mv_count[ref_frame_type];
-  memcpy(x->mbmi_ext_frame->global_mvs, x->mbmi_ext->global_mvs,
-         sizeof(x->mbmi_ext->global_mvs));
-}
-
 static void hybrid_intra_mode_search(AV1_COMP *cpi, MACROBLOCK *const x,
                                      RD_STATS *rd_cost, BLOCK_SIZE bsize,
                                      PICK_MODE_CONTEXT *ctx) {
@@ -1677,7 +1674,12 @@
     }
   }
   // TODO(Ravi/Remya): Move this copy function to a better logical place
-  copy_winner_ref_mode_from_mbmi_ext(x);
+  // This function will copy the winner reference mode information from block
+  // level (x->mbmi_ext) to frame level (cpi->mbmi_ext_frame_base). This frame
+  // level buffer (cpi->mbmi_ext_frame_base) will be used during bitstream
+  // preparation.
+  av1_copy_mbmi_ext_to_mbmi_ext_frame(x->mbmi_ext_frame, x->mbmi_ext,
+                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
   x->rdmult = origin_mult;
 }
 
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 4e81806..902420d 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -940,7 +940,8 @@
 #endif  // CONFIG_INTERNAL_STATS
   ctx->mic = *xd->mi[0];
   ctx->skippable = x->force_skip;
-  ctx->mbmi_ext = *x->mbmi_ext;
+  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, x->mbmi_ext,
+                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
   ctx->comp_pred_diff = 0;
   ctx->hybrid_pred_diff = 0;
   ctx->single_pred_diff = 0;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index b381d95..8022137 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -905,7 +905,8 @@
   ctx->best_mode_index = mode_index;
 #endif  // CONFIG_INTERNAL_STATS
   ctx->mic = *xd->mi[0];
-  ctx->mbmi_ext = *x->mbmi_ext;
+  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, x->mbmi_ext,
+                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
   ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
@@ -2880,7 +2881,8 @@
   if (rd_cost->rate == INT_MAX) return;
 
   ctx->mic = *xd->mi[0];
-  ctx->mbmi_ext = *x->mbmi_ext;
+  av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, x->mbmi_ext,
+                                      av1_ref_frame_type(xd->mi[0]->ref_frame));
   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
 }
 
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 821c485..c7c99ac 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -221,6 +221,22 @@
 
   return 0;
 }
+
+// This function will copy the best reference mode information from
+// MB_MODE_INFO_EXT to MB_MODE_INFO_EXT_FRAME.
+static INLINE void av1_copy_mbmi_ext_to_mbmi_ext_frame(
+    MB_MODE_INFO_EXT_FRAME *mbmi_ext_best,
+    const MB_MODE_INFO_EXT *const mbmi_ext, uint8_t ref_frame_type) {
+  memcpy(mbmi_ext_best->ref_mv_stack, mbmi_ext->ref_mv_stack[ref_frame_type],
+         sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+  memcpy(mbmi_ext_best->weight, mbmi_ext->weight[ref_frame_type],
+         sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+  mbmi_ext_best->mode_context = mbmi_ext->mode_context[ref_frame_type];
+  mbmi_ext_best->ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
+  memcpy(mbmi_ext_best->global_mvs, mbmi_ext->global_mvs,
+         sizeof(mbmi_ext->global_mvs));
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif