Modify frame level structure for mbmi_ext_base

Introduced a new structure MB_MODE_INFO_EXT_FRAME at frame level
(for mbmi_ext_base) – which will store refmvs, weight, mode_context
and refmv_count for only the winner reference mode instead of all
possible reference modes (MODE_CTX_REF_FRAMES). This will be used
during the bitstream preparation stage.

Observed memory footprint reduction with similar encoding time.

Resolution    Tile     Memory reduction
                       Single   Multi
                       Thread   Thread
832x480       2x1      ~19%     ~14% (2 threads)
1280x720      2x2      ~30%     ~16% (4 threads)
1920x1080     4x2      ~37%     ~18% (8 threads)

Memory measuring command:
$ command time -v ./aomenc ...

Change-Id: Ief3b3832b79acbf93dab8674fa4cac9bd26770b8
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index b13d16f..3f24901 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -96,17 +96,16 @@
 }
 
 static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
-                          const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) {
-  uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
-
+                          const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
+                          aom_writer *w) {
   assert(mbmi->ref_mv_idx < 3);
 
   const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV;
   if (new_mv) {
     int idx;
     for (idx = 0; idx < 2; ++idx) {
-      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
+      if (mbmi_ext_frame->ref_mv_count > idx + 1) {
+        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext_frame->weight, idx);
 
         aom_write_symbol(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_cdf[drl_ctx],
                          2);
@@ -120,8 +119,8 @@
     int idx;
     // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
     for (idx = 1; idx < 3; ++idx) {
-      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
-        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
+      if (mbmi_ext_frame->ref_mv_count > idx + 1) {
+        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext_frame->weight, idx);
         aom_write_symbol(w, mbmi->ref_mv_idx != (idx - 1),
                          ec_ctx->drl_cdf[drl_ctx], 2);
         if (mbmi->ref_mv_idx == (idx - 1)) return;
@@ -369,9 +368,9 @@
   if (tx_size == plane_tx_size || plane) {
     const CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff;
     const int txb_offset =
-        x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+        x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
     const tran_low_t *tcoeff_txb =
-        cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
+        cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset;
     const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
     const uint8_t *txb_skip_ctx_txb =
         cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
@@ -1032,27 +1031,22 @@
 }
 
 static INLINE int16_t mode_context_analyzer(
-    const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf) {
-  const int8_t ref_frame = av1_ref_frame_type(rf);
+    const int16_t mode_context, const MV_REFERENCE_FRAME *const rf) {
+  if (rf[1] <= INTRA_FRAME) return mode_context;
 
-  if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
-
-  const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
-  const int16_t refmv_ctx =
-      (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
+  const int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
+  const int16_t refmv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
 
   const int16_t comp_ctx = compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN(
       newmv_ctx, COMP_NEWMV_CTXS - 1)];
   return comp_ctx;
 }
 
-static INLINE int_mv get_ref_mv_from_stack(int ref_idx,
-                                           const MV_REFERENCE_FRAME *ref_frame,
-                                           int ref_mv_idx,
-                                           const MB_MODE_INFO_EXT *mbmi_ext) {
+static INLINE int_mv get_ref_mv_from_stack(
+    int ref_idx, const MV_REFERENCE_FRAME *ref_frame, int ref_mv_idx,
+    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame) {
   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
-  const CANDIDATE_MV *curr_ref_mv_stack =
-      mbmi_ext->ref_mv_stack[ref_frame_type];
+  const CANDIDATE_MV *curr_ref_mv_stack = mbmi_ext_frame->ref_mv_stack;
 
   if (ref_frame[1] > INTRA_FRAME) {
     assert(ref_idx == 0 || ref_idx == 1);
@@ -1061,9 +1055,9 @@
   }
 
   assert(ref_idx == 0);
-  return ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]
+  return ref_mv_idx < mbmi_ext_frame->ref_mv_count
              ? curr_ref_mv_stack[ref_mv_idx].this_mv
-             : mbmi_ext->global_mvs[ref_frame_type];
+             : mbmi_ext_frame->global_mvs[ref_frame_type];
 }
 
 static INLINE int_mv get_ref_mv(const MACROBLOCK *x, int ref_idx) {
@@ -1075,7 +1069,7 @@
     ref_mv_idx += 1;
   }
   return get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
-                               x->mbmi_ext);
+                               x->mbmi_ext_frame);
 }
 
 static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row,
@@ -1087,7 +1081,7 @@
   const struct segmentation *const seg = &cm->seg;
   struct segmentation_probs *const segp = &ec_ctx->seg;
   const MB_MODE_INFO *const mbmi = xd->mi[0];
-  const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+  const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_frame = x->mbmi_ext_frame;
   const PREDICTION_MODE mode = mbmi->mode;
   const int segment_id = mbmi->segment_id;
   const BLOCK_SIZE bsize = mbmi->sb_type;
@@ -1123,7 +1117,8 @@
 
     write_ref_frames(cm, xd, w);
 
-    mode_ctx = mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
+    mode_ctx =
+        mode_context_analyzer(mbmi_ext_frame->mode_context, mbmi->ref_frame);
 
     // If segment skip is not enabled code the mode.
     if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
@@ -1133,7 +1128,7 @@
         write_inter_mode(w, mode, ec_ctx, mode_ctx);
 
       if (mode == NEWMV || mode == NEW_NEWMV || have_nearmv_in_inter_mode(mode))
-        write_drl_idx(ec_ctx, mbmi, mbmi_ext, w);
+        write_drl_idx(ec_ctx, mbmi, mbmi_ext_frame, w);
       else
         assert(mbmi->ref_mv_idx == 0);
     }
@@ -1235,7 +1230,7 @@
 }
 
 static void write_intrabc_info(MACROBLOCKD *xd,
-                               const MB_MODE_INFO_EXT *mbmi_ext,
+                               const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
                                aom_writer *w) {
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   int use_intrabc = is_intrabc_block(mbmi);
@@ -1245,13 +1240,13 @@
     assert(mbmi->mode == DC_PRED);
     assert(mbmi->uv_mode == UV_DC_PRED);
     assert(mbmi->motion_mode == SIMPLE_TRANSLATION);
-    int_mv dv_ref = mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv;
+    int_mv dv_ref = mbmi_ext_frame->ref_mv_stack[0].this_mv;
     av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
   }
 }
 
 static void write_mb_modes_kf(AV1_COMP *cpi, MACROBLOCKD *xd,
-                              const MB_MODE_INFO_EXT *mbmi_ext,
+                              const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
                               const int mi_row, const int mi_col,
                               aom_writer *w) {
   AV1_COMMON *const cm = &cpi->common;
@@ -1273,7 +1268,7 @@
   write_delta_q_params(cpi, mi_row, mi_col, skip, w);
 
   if (av1_allow_intrabc(cm)) {
-    write_intrabc_info(xd, mbmi_ext, w);
+    write_intrabc_info(xd, mbmi_ext_frame, w);
     if (is_intrabc_block(mbmi)) return;
   }
 
@@ -1319,10 +1314,10 @@
 #if ENC_MISMATCH_DEBUG
 static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) {
   AV1_COMMON *const cm = &cpi->common;
-  const MB_MODE_INFO *const *mbmi =
+  const MB_MODE_INFO *const mbmi =
       *(cm->mi_grid_base + (mi_row * cm->mi_stride + mi_col));
-  const MB_MODE_INFO_EXT *const *mbmi_ext =
-      cpi->mbmi_ext_base + get_mi_ext_idx(cm, mi_row, mi_col);
+  const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_frame_base =
+      cpi->mbmi_ext_frame_base + get_mi_ext_idx(cm, mi_row, mi_col);
   if (is_inter_block(mbmi)) {
 #define FRAME_TO_CHECK 11
     if (cm->current_frame.frame_number == FRAME_TO_CHECK &&
@@ -1340,9 +1335,9 @@
       }
 
       const int16_t mode_ctx =
-          is_comp_ref
-              ? 0
-              : mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
+          is_comp_ref ? 0
+                      : mode_context_analyzer(mbmi_ext_frame->mode_context,
+                                              mbmi->ref_frame);
 
       const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK;
       int16_t zeromv_ctx = -1;
@@ -1376,7 +1371,7 @@
   MB_MODE_INFO *m = xd->mi[0];
 
   if (frame_is_intra_only(cm)) {
-    write_mb_modes_kf(cpi, xd, cpi->td.mb.mbmi_ext, mi_row, mi_col, w);
+    write_mb_modes_kf(cpi, xd, cpi->td.mb.mbmi_ext_frame, mi_row, mi_col, w);
   } else {
     // has_subpel_mv_component needs the ref frame buffers set up to look
     // up if they are scaled. has_subpel_mv_component is in turn needed by
@@ -1508,7 +1503,8 @@
   const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
   xd->mi = cm->mi_grid_base + (mi_row * cm->mi_stride + mi_col);
-  cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + get_mi_ext_idx(cm, mi_row, mi_col);
+  cpi->td.mb.mbmi_ext_frame =
+      cpi->mbmi_ext_frame_base + get_mi_ext_idx(cm, mi_row, mi_col);
 
   const MB_MODE_INFO *mbmi = xd->mi[0];
   const BLOCK_SIZE bsize = mbmi->sb_type;
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 503889e..468de1a 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -87,11 +87,24 @@
   CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
   uint16_t weight[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
   int_mv global_mvs[REF_FRAMES];
+  // TODO(Ravi/Remya): Check if this variable is still needed at block level
   int cb_offset;
   int16_t mode_context[MODE_CTX_REF_FRAMES];
   uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
 } MB_MODE_INFO_EXT;
 
+// Structure to store winner reference mode information at frame level. This
+// frame level information will be used during bitstream preparation stage.
+typedef struct {
+  CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE];
+  uint16_t weight[MAX_REF_MV_STACK_SIZE];
+  // TODO(Ravi/Remya): Reduce the buffer size of global_mvs
+  int_mv global_mvs[REF_FRAMES];
+  int cb_offset;
+  int16_t mode_context;
+  uint8_t ref_mv_count;
+} MB_MODE_INFO_EXT_FRAME;
+
 typedef struct {
   int col_min;
   int col_max;
@@ -226,6 +239,7 @@
 
   MACROBLOCKD e_mbd;
   MB_MODE_INFO_EXT *mbmi_ext;
+  MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame;
   int skip_block;
   int qindex;
 
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index e15f879..8803cac 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -999,7 +999,6 @@
 
   xd->mi = cm->mi_grid_base;
   xd->mi[0] = cm->mi;
-  x->mbmi_ext = cpi->mbmi_ext_base;
 }
 
 // Apply temporal filtering to key frames and encode the filtered frame.
@@ -1046,7 +1045,8 @@
     setup_mi(cpi, frame_input->source);
     av1_init_macroblockd(cm, xd, NULL);
     const int ext_mi_size = cm->mi_alloc_rows * cm->mi_alloc_cols;
-    memset(cpi->mbmi_ext_base, 0, ext_mi_size * sizeof(*cpi->mbmi_ext_base));
+    memset(cpi->mbmi_ext_frame_base, 0,
+           ext_mi_size * sizeof(*cpi->mbmi_ext_frame_base));
 
     av1_set_speed_features_framesize_independent(cpi, oxcf->speed);
     av1_set_speed_features_framesize_dependent(cpi, oxcf->speed);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 630ab96..4e587a7 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -577,6 +577,25 @@
   return 0 && !frame_is_intra_only(&cpi->common);
 }
 
+// This function will copy the winner reference mode information from block
+// level (x->mbmi_ext) to frame level (cpi->mbmi_ext_frame_base). This frame
+// level buffer (cpi->mbmi_ext_frame_base) will be used during bitstream
+// preparation.
+static INLINE void copy_winner_ref_mode_from_mbmi_ext(MACROBLOCK *const x) {
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *mbmi = xd->mi[0];
+  uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+  memcpy(x->mbmi_ext_frame->ref_mv_stack,
+         x->mbmi_ext->ref_mv_stack[ref_frame_type],
+         sizeof(x->mbmi_ext->ref_mv_stack[MAX_REF_MV_STACK_SIZE]));
+  memcpy(x->mbmi_ext_frame->weight, x->mbmi_ext->weight[ref_frame_type],
+         sizeof(x->mbmi_ext->weight[MAX_REF_MV_STACK_SIZE]));
+  x->mbmi_ext_frame->mode_context = x->mbmi_ext->mode_context[ref_frame_type];
+  x->mbmi_ext_frame->ref_mv_count = x->mbmi_ext->ref_mv_count[ref_frame_type];
+  memcpy(x->mbmi_ext_frame->global_mvs, x->mbmi_ext->global_mvs,
+         sizeof(x->mbmi_ext->global_mvs));
+}
+
 static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
                           MACROBLOCK *const x, int mi_row, int mi_col,
                           RD_STATS *rd_cost, PARTITION_TYPE partition,
@@ -1441,6 +1460,7 @@
 
   if (!dry_run) {
     x->mbmi_ext->cb_offset = x->cb_offset;
+    x->mbmi_ext_frame->cb_offset = x->cb_offset;
     assert(x->cb_offset <
            (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]));
   }
@@ -1522,7 +1542,8 @@
       update_stats(&cpi->common, td, mi_row, mi_col);
     }
   }
-
+  // TODO(Ravi/Remya): Move this copy function to a better logical place
+  copy_winner_ref_mode_from_mbmi_ext(x);
   x->rdmult = origin_mult;
 }
 
@@ -3912,6 +3933,7 @@
   const int mib_size = cm->seq_params.mib_size;
   const int mib_size_log2 = cm->seq_params.mib_size_log2;
   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
+  int sb_mi_size = av1_get_sb_mi_size(cm);
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
   start_timing(cpi, encode_sb_time);
@@ -3933,6 +3955,7 @@
   // Code each SB in the row
   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
+    memset(x->mbmi_ext, 0, sb_mi_size * sizeof(*x->mbmi_ext));
     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
                                    sb_col_in_tile);
     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index f2ea5bf..1cca1a5 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -458,9 +458,9 @@
 }
 
 static void dealloc_context_buffers_ext(AV1_COMP *cpi) {
-  if (cpi->mbmi_ext_base) {
-    aom_free(cpi->mbmi_ext_base);
-    cpi->mbmi_ext_base = NULL;
+  if (cpi->mbmi_ext_frame_base) {
+    aom_free(cpi->mbmi_ext_frame_base);
+    cpi->mbmi_ext_frame_base = NULL;
   }
 }
 
@@ -470,8 +470,9 @@
 
   if (new_ext_mi_size > cpi->mi_ext_alloc_size) {
     dealloc_context_buffers_ext(cpi);
-    CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
-                    aom_calloc(new_ext_mi_size, sizeof(*cpi->mbmi_ext_base)));
+    CHECK_MEM_ERROR(
+        cm, cpi->mbmi_ext_frame_base,
+        aom_calloc(new_ext_mi_size, sizeof(*cpi->mbmi_ext_frame_base)));
     cpi->mi_ext_alloc_size = new_ext_mi_size;
   }
 }
@@ -577,6 +578,9 @@
   aom_free(cm->tpl_mvs);
   cm->tpl_mvs = NULL;
 
+  aom_free(cpi->td.mb.mbmi_ext);
+  cpi->td.mb.mbmi_ext = NULL;
+
   av1_free_ref_frame_buffers(cm->buffer_pool);
   av1_free_txb_buf(cpi);
   av1_free_context_buffers(cm);
@@ -988,7 +992,8 @@
 
   const int ext_mi_size = cm->mi_alloc_rows * cm->mi_alloc_cols;
   alloc_context_buffers_ext(cpi);
-  memset(cpi->mbmi_ext_base, 0, ext_mi_size * sizeof(*cpi->mbmi_ext_base));
+  memset(cpi->mbmi_ext_frame_base, 0,
+         ext_mi_size * sizeof(*cpi->mbmi_ext_frame_base));
   set_tile_info(cpi);
 }
 
@@ -2789,6 +2794,8 @@
   }
 #endif
 
+  int sb_mi_size = av1_get_sb_mi_size(cm);
+
   CHECK_MEM_ERROR(
       cm, cpi->td.mb.above_pred_buf,
       (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE *
@@ -2819,6 +2826,9 @@
                   (int32_t *)aom_memalign(
                       16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf)));
 
+  CHECK_MEM_ERROR(cm, cpi->td.mb.mbmi_ext,
+                  aom_calloc(sb_mi_size, sizeof(*cpi->td.mb.mbmi_ext)));
+
   av1_set_speed_features_framesize_independent(cpi, oxcf->speed);
   av1_set_speed_features_framesize_dependent(cpi, oxcf->speed);
 
@@ -3227,6 +3237,7 @@
       aom_free(thread_data->td->mask_buf);
       aom_free(thread_data->td->counts);
       av1_free_pc_tree(thread_data->td, num_planes);
+      aom_free(thread_data->td->mbmi_ext);
       aom_free(thread_data->td);
     }
   }
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 87f70de..d511c51 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -627,6 +627,7 @@
   int intrabc_used;
   int deltaq_used;
   FRAME_CONTEXT *tctx;
+  MB_MODE_INFO_EXT *mbmi_ext;
 } ThreadData;
 
 struct EncWorkerData;
@@ -744,7 +745,7 @@
   QUANTS quants;
   ThreadData td;
   FRAME_COUNTS counts;
-  MB_MODE_INFO_EXT *mbmi_ext_base;
+  MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame_base;
   CB_COEFF_BUFFER *coeff_buffer_base;
   Dequants dequants;
   AV1_COMMON common;
@@ -1365,7 +1366,7 @@
   xd->mi = cm->mi_grid_base + grid_idx;
   xd->mi[0] = cm->mi + mi_idx;
 
-  x->mbmi_ext = cpi->mbmi_ext_base + ext_idx;
+  x->mbmi_ext_frame = cpi->mbmi_ext_frame_base + ext_idx;
 }
 
 // Check to see if the given partition size is allowed for a specified number
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 8431f4b..20956a8 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -640,9 +640,9 @@
   MACROBLOCKD *xd = &x->e_mbd;
   const CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff;
   const int txb_offset =
-      x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+      x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
   const tran_low_t *tcoeff_txb =
-      cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
+      cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset;
   const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
   const uint8_t *txb_skip_ctx_txb =
       cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
@@ -2042,7 +2042,7 @@
 
   CB_COEFF_BUFFER *cb_coef_buff = x->cb_coef_buff;
   const int txb_offset =
-      x->mbmi_ext->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+      x->mbmi_ext_frame->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
   uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
   uint8_t *txb_skip_ctx_txb = cb_coef_buff->txb_skip_ctx[plane] + txb_offset;
   txb_skip_ctx_txb[block] = txb_ctx.txb_skip_ctx;
@@ -2053,7 +2053,8 @@
     return;
   }
 
-  tran_low_t *tcoeff_txb = cb_coef_buff->tcoeff[plane] + x->mbmi_ext->cb_offset;
+  tran_low_t *tcoeff_txb =
+      cb_coef_buff->tcoeff[plane] + x->mbmi_ext_frame->cb_offset;
   tran_low_t *tcoeff = BLOCK_OFFSET(tcoeff_txb, block);
   const int segment_id = mbmi->segment_id;
   const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 13b5b76..18a2ef3 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -377,6 +377,7 @@
 static void create_enc_workers(AV1_COMP *cpi, int num_workers) {
   AV1_COMMON *const cm = &cpi->common;
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+  int sb_mi_size = av1_get_sb_mi_size(cm);
 
   CHECK_MEM_ERROR(cm, cpi->workers,
                   aom_malloc(num_workers * sizeof(*cpi->workers)));
@@ -467,6 +468,10 @@
                                  sizeof(*thread_data->td->tmp_obmc_bufs[j])));
       }
 
+      CHECK_MEM_ERROR(
+          cm, thread_data->td->mbmi_ext,
+          aom_calloc(sb_mi_size, sizeof(*thread_data->td->mbmi_ext)));
+
       // Create threads
       if (!winterface->reset(worker))
         aom_internal_error(&cm->error, AOM_CODEC_ERROR,
@@ -567,6 +572,7 @@
         }
       }
       thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
+      thread_data->td->mb.mbmi_ext = thread_data->td->mbmi_ext;
     }
     if (thread_data->td->counts != &cpi->counts) {
       memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts));
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 8031bfc..99358c7 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -192,6 +192,19 @@
   return (a - b);
 }
 
+// This function will return number of mi's in a superblock.
+static INLINE int av1_get_sb_mi_size(const AV1_COMMON *const cm) {
+  const int mi_alloc_size_1d = mi_size_wide[cm->mi_alloc_bsize];
+  int sb_mi_rows =
+      (mi_size_wide[cm->seq_params.sb_size] + mi_alloc_size_1d - 1) /
+      mi_alloc_size_1d;
+  assert(mi_size_wide[cm->seq_params.sb_size] ==
+         mi_size_high[cm->seq_params.sb_size]);
+  int sb_mi_size = sb_mi_rows * sb_mi_rows;
+
+  return sb_mi_size;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif