AV1 RT: Move MvCosts into dynamically alloc structure

MvCost is ~500k size and copying it for RT Multithreading has a hit of
about 100 usec per thread. As we run RT with no MvCost update we don't
need to have a copy for each thread. Moving it to dynamic allocation if
needed.
~2% speed up for 2 threads VGA with speed8

Change-Id: Iadfc2bf2f30b680685e433a07e18c88efbcc910e
diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c
index ff62c45..9d38e2d 100644
--- a/av1/encoder/av1_quantize.c
+++ b/av1/encoder/av1_quantize.c
@@ -730,9 +730,8 @@
   x->seg_skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
   x->qindex = qindex;
 
-  MvCosts *mv_costs = &x->mv_costs;
-  av1_set_error_per_bit(mv_costs, rdmult);
-  av1_set_sad_per_bit(cpi, mv_costs, qindex);
+  av1_set_error_per_bit(&x->errorperbit, rdmult);
+  av1_set_sad_per_bit(cpi, &x->sadperbit, qindex);
 }
 
 void av1_frame_init_quantizer(AV1_COMP *cpi) {
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3c40085..f72d11a 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -751,16 +751,6 @@
  */
 typedef struct {
   /*****************************************************************************
-   * \name Rate to Distortion Multipliers
-   ****************************************************************************/
-  /**@{*/
-  //! A multiplier that converts mv cost to l2 error.
-  int errorperbit;
-  //! A multiplier that converts mv cost to l1 error.
-  int sadperbit;
-  /**@}*/
-
-  /*****************************************************************************
    * \name Encoding Costs
    * Here are the entropy costs needed to encode a given mv.
    * \ref nmv_cost_alloc and \ref nmv_cost_hp_alloc are two arrays that holds
@@ -953,12 +943,22 @@
 
   //! The rate needed to encode a new motion vector to the bitstream and some
   //! multipliers for motion search.
-  MvCosts mv_costs;
+  MvCosts *mv_costs;
 
   //! The rate needed to signal the txfm coefficients to the bitstream.
   CoeffCosts coeff_costs;
   /**@}*/
 
+  /*****************************************************************************
+   * \name Rate to Distortion Multipliers
+   ****************************************************************************/
+  /**@{*/
+  //! A multiplier that converts mv cost to l2 error.
+  int errorperbit;
+  //! A multiplier that converts mv cost to l1 error.
+  int sadperbit;
+  /**@}*/
+
   /******************************************************************************
    * \name Segmentation
    *****************************************************************************/
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index f2981c5..5c8d9a8 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -1381,7 +1381,7 @@
 
   av1_frame_init_quantizer(cpi);
   av1_initialize_rd_consts(cpi);
-  av1_set_sad_per_bit(cpi, &x->mv_costs, quant_params->base_qindex);
+  av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
 
   init_encode_frame_mb_context(cpi);
   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index 7223e7d..b832d91 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -26,7 +26,7 @@
                                       quant_params->y_dc_delta_q);
 }
 
-void av1_set_ssim_rdmult(const AV1_COMP *const cpi, MvCosts *const mv_costs,
+void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
                          const BLOCK_SIZE bsize, const int mi_row,
                          const int mi_col, int *const rdmult) {
   const AV1_COMMON *const cm = &cpi->common;
@@ -58,7 +58,7 @@
 
   *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
   *rdmult = AOMMAX(*rdmult, 0);
-  av1_set_error_per_bit(mv_costs, *rdmult);
+  av1_set_error_per_bit(errorperbit, *rdmult);
   aom_clear_system_state();
 }
 
@@ -131,7 +131,7 @@
   geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count);
   int rdmult = (int)((double)orig_rdmult * geom_mean_of_scale + 0.5);
   rdmult = AOMMAX(rdmult, 0);
-  av1_set_error_per_bit(&x->mv_costs, rdmult);
+  av1_set_error_per_bit(&x->errorperbit, rdmult);
   aom_clear_system_state();
   if (bsize == cm->seq_params.sb_size) {
     const int rdmult_sb = set_deltaq_rdmult(cpi, x);
@@ -1371,7 +1371,7 @@
       // Checks for skip status of mv cost update.
       if (skip_mv_cost_update(cpi, tile_info, mi_row, mi_col)) break;
       av1_fill_mv_costs(xd->tile_ctx, cm->features.cur_frame_force_integer_mv,
-                        cm->features.allow_high_precision_mv, &x->mv_costs);
+                        cm->features.allow_high_precision_mv, x->mv_costs);
       break;
     default: assert(0);
   }
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 4c193a5..2d164ab 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -268,7 +268,7 @@
                                    int mi_row, int mi_col);
 #endif  // !CONFIG_REALTIME_ONLY
 
-void av1_set_ssim_rdmult(const AV1_COMP *const cpi, MvCosts *const mv_costs,
+void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
                          const BLOCK_SIZE bsize, const int mi_row,
                          const int mi_col, int *const rdmult);
 
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 6d1da65..c45fe03 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -71,6 +71,12 @@
   if (!is_stat_generation_stage(cpi)) {
     alloc_token_info(cm, token_info);
   }
+  if (cpi->td.mb.mv_costs) {
+    aom_free(cpi->td.mb.mv_costs);
+    cpi->td.mb.mv_costs = NULL;
+  }
+  CHECK_MEM_ERROR(cm, cpi->td.mb.mv_costs,
+                  (MvCosts *)aom_calloc(1, sizeof(MvCosts)));
 
   av1_setup_shared_coeff_buffer(&cpi->common, &cpi->td.shared_coeff_buf);
   av1_setup_sms_tree(cpi, &cpi->td);
@@ -255,6 +261,11 @@
 
   release_obmc_buffers(&cpi->td.mb.obmc_buffer);
 
+  if (cpi->td.mb.mv_costs) {
+    aom_free(cpi->td.mb.mv_costs);
+    cpi->td.mb.mv_costs = NULL;
+  }
+
   aom_free(cpi->td.mb.inter_modes_info);
   cpi->td.mb.inter_modes_info = NULL;
 
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 2aa3dd0..ed934fb 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -779,6 +779,10 @@
       aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
       thread_data->td->mb.txfm_search_info.txb_rd_records = NULL;
     }
+    if (thread_data->td != &cpi->td &&
+        cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+      aom_free(thread_data->td->mb.mv_costs);
+    }
 
     // Accumulate counters.
     if (i > 0) {
@@ -797,6 +801,7 @@
 static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
                                            int num_workers) {
   MultiThreadInfo *const mt_info = &cpi->mt_info;
+  AV1_COMMON *const cm = &cpi->common;
   for (int i = num_workers - 1; i >= 0; i--) {
     AVxWorker *const worker = &mt_info->workers[i];
     EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
@@ -830,10 +835,16 @@
               thread_data->td->hash_value_buffer[x][y];
         }
       }
+      if (cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+        CHECK_MEM_ERROR(cm, thread_data->td->mb.mv_costs,
+                        (MvCosts *)aom_malloc(sizeof(MvCosts)));
+        memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
+               sizeof(MvCosts));
+      }
     }
     if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
-      thread_data->td->mb.txfm_search_info.txb_rd_records =
-          (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords));
+      CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
+                      (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));
     }
 
     if (thread_data->td->counts != &cpi->counts) {
@@ -861,6 +872,7 @@
 #if !CONFIG_REALTIME_ONLY
 static AOM_INLINE void fp_prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
                                               int num_workers) {
+  AV1_COMMON *const cm = &cpi->common;
   MultiThreadInfo *const mt_info = &cpi->mt_info;
   for (int i = num_workers - 1; i >= 0; i--) {
     AVxWorker *const worker = &mt_info->workers[i];
@@ -878,10 +890,16 @@
     // Before encoding a frame, copy the thread data from cpi.
     if (thread_data->td != &cpi->td) {
       thread_data->td->mb = cpi->td.mb;
+      if (cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+        CHECK_MEM_ERROR(cm, thread_data->td->mb.mv_costs,
+                        (MvCosts *)aom_malloc(sizeof(MvCosts)));
+        memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
+               sizeof(MvCosts));
+      }
     }
     if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
-      thread_data->td->mb.txfm_search_info.txb_rd_records =
-          (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords));
+      CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
+                      (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));
     }
   }
 }
@@ -1165,6 +1183,10 @@
   sync_enc_workers(&cpi->mt_info, cm, num_workers);
   for (int i = num_workers - 1; i >= 0; i--) {
     EncWorkerData *const thread_data = &cpi->mt_info.tile_thr_data[i];
+    if (thread_data->td != &cpi->td &&
+        cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+      aom_free(thread_data->td->mb.mv_costs);
+    }
     if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
       aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
     }
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 225263c..f80fc48 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -34,12 +34,13 @@
 
 static INLINE void init_mv_cost_params(MV_COST_PARAMS *mv_cost_params,
                                        const MvCosts *mv_costs,
-                                       const MV *ref_mv) {
+                                       const MV *ref_mv, int errorperbit,
+                                       int sadperbit) {
   mv_cost_params->ref_mv = ref_mv;
   mv_cost_params->full_ref_mv = get_fullmv_from_mv(ref_mv);
   mv_cost_params->mv_cost_type = MV_COST_ENTROPY;
-  mv_cost_params->error_per_bit = mv_costs->errorperbit;
-  mv_cost_params->sad_per_bit = mv_costs->sadperbit;
+  mv_cost_params->error_per_bit = errorperbit;
+  mv_cost_params->sad_per_bit = sadperbit;
   mv_cost_params->mvjcost = mv_costs->nmv_joint_cost;
   mv_cost_params->mvcost[0] = mv_costs->mv_cost_stack[0];
   mv_cost_params->mvcost[1] = mv_costs->mv_cost_stack[1];
@@ -133,7 +134,8 @@
   av1_set_mv_search_range(&ms_params->mv_limits, ref_mv);
 
   // Mvcost params
-  init_mv_cost_params(&ms_params->mv_cost_params, &x->mv_costs, ref_mv);
+  init_mv_cost_params(&ms_params->mv_cost_params, x->mv_costs, ref_mv,
+                      x->errorperbit, x->sadperbit);
 }
 
 void av1_make_default_subpel_ms_params(SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
@@ -150,7 +152,8 @@
   av1_set_subpel_mv_search_range(&ms_params->mv_limits, &x->mv_limits, ref_mv);
 
   // Mvcost params
-  init_mv_cost_params(&ms_params->mv_cost_params, &x->mv_costs, ref_mv);
+  init_mv_cost_params(&ms_params->mv_cost_params, x->mv_costs, ref_mv,
+                      x->errorperbit, x->sadperbit);
 
   // Subpel variance params
   ms_params->var_params.vfp = &cpi->fn_ptr[bsize];
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 7e9e12e..bad8e65 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -132,7 +132,7 @@
       av1_get_scaled_ref_frame(cpi, ref);
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
-  const MvCosts *mv_costs = &x->mv_costs;
+  const MvCosts *mv_costs = x->mv_costs;
 
   if (scaled_ref_frame) {
     // Swap out the reference frame for a version that's been scaled to
@@ -385,7 +385,7 @@
   assert(has_second_ref(mbmi));
   const int_mv init_mv[2] = { cur_mv[0], cur_mv[1] };
   const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
-  const MvCosts *mv_costs = &x->mv_costs;
+  const MvCosts *mv_costs = x->mv_costs;
   int_mv ref_mv[2];
   int ite, ref;
 
@@ -586,7 +586,7 @@
   const int ref = mbmi->ref_frame[ref_idx];
   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
   struct macroblockd_plane *const pd = &xd->plane[0];
-  const MvCosts *mv_costs = &x->mv_costs;
+  const MvCosts *mv_costs = x->mv_costs;
 
   struct buf_2d backup_yv12[MAX_MB_PLANE];
   const YV12_BUFFER_CONFIG *const scaled_ref_frame =
diff --git a/av1/encoder/mv_prec.h b/av1/encoder/mv_prec.h
index 8564226..89f95f5 100644
--- a/av1/encoder/mv_prec.h
+++ b/av1/encoder/mv_prec.h
@@ -32,7 +32,7 @@
 static AOM_INLINE void av1_set_high_precision_mv(
     AV1_COMP *cpi, int allow_high_precision_mv,
     int cur_frame_force_integer_mv) {
-  MvCosts *const mv_costs = &cpi->td.mb.mv_costs;
+  MvCosts *const mv_costs = cpi->td.mb.mv_costs;
   const int copy_hp = cpi->common.features.allow_high_precision_mv =
       allow_high_precision_mv && !cur_frame_force_integer_mv;
 
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index c8b1c52..5831881 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -203,8 +203,8 @@
   // calculate the bit cost on motion vector
   MV mvp_full = get_mv_from_fullmv(&tmp_mv->as_fullmv);
 
-  *rate_mv = av1_mv_bit_cost(&mvp_full, &ref_mv, x->mv_costs.nmv_joint_cost,
-                             x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+  *rate_mv = av1_mv_bit_cost(&mvp_full, &ref_mv, x->mv_costs->nmv_joint_cost,
+                             x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
 
   // TODO(kyslov) Account for Rate Mode!
   rv = !(RDCOST(x->rdmult, (*rate_mv), 0) > best_rd_sofar);
@@ -219,8 +219,8 @@
         &x->pred_sse[ref], NULL);
 
     *rate_mv =
-        av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->mv_costs.nmv_joint_cost,
-                        x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+        av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->mv_costs->nmv_joint_cost,
+                        x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
   }
 
   if (scaled_ref_frame) {
@@ -290,8 +290,8 @@
     MV ref_mv = av1_get_ref_mv(x, 0).as_mv;
 
     *rate_mv = av1_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, &ref_mv,
-                               x->mv_costs.nmv_joint_cost,
-                               x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+                               x->mv_costs->nmv_joint_cost,
+                               x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
     frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
     frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
 
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index ffe0a95..da79d5b 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -430,7 +430,8 @@
   }
 
   if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM) {
-    av1_set_ssim_rdmult(cpi, &x->mv_costs, bsize, mi_row, mi_col, &x->rdmult);
+    av1_set_ssim_rdmult(cpi, &x->errorperbit, bsize, mi_row, mi_col,
+                        &x->rdmult);
   }
 #if CONFIG_TUNE_VMAF
   if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_WITHOUT_PREPROCESSING ||
@@ -661,7 +662,7 @@
   const int orig_rdmult = x->rdmult;
   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
   // Set error per bit for current rdmult
-  av1_set_error_per_bit(&x->mv_costs, x->rdmult);
+  av1_set_error_per_bit(&x->errorperbit, x->rdmult);
   av1_rd_cost_update(x->rdmult, &best_rd);
 
   // Find best coding mode & reconstruct the MB so it is available
@@ -1881,7 +1882,7 @@
   const int orig_rdmult = x->rdmult;
   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode, mbmi);
   // Set error per bit for current rdmult
-  av1_set_error_per_bit(&x->mv_costs, x->rdmult);
+  av1_set_error_per_bit(&x->errorperbit, x->rdmult);
   // Find best coding mode & reconstruct the MB so it is available
   // as a predictor for MBs that follow in the SB
   if (frame_is_intra_only(cm)) {
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 6bceddb..067028d 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -432,11 +432,11 @@
   return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
 }
 
-void av1_set_sad_per_bit(const AV1_COMP *cpi, MvCosts *mv_costs, int qindex) {
+void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
   switch (cpi->common.seq_params.bit_depth) {
-    case AOM_BITS_8: mv_costs->sadperbit = sad_per_bit_lut_8[qindex]; break;
-    case AOM_BITS_10: mv_costs->sadperbit = sad_per_bit_lut_10[qindex]; break;
-    case AOM_BITS_12: mv_costs->sadperbit = sad_per_bit_lut_12[qindex]; break;
+    case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
+    case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
+    case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
     default:
       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
   }
@@ -581,7 +581,7 @@
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
   RD_OPT *const rd = &cpi->rd;
-  MvCosts *mv_costs = &x->mv_costs;
+  MvCosts *mv_costs = x->mv_costs;
   int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
   CostUpdateFreq cost_upd_freq = cpi->oxcf.cost_upd_freq;
   int fill_costs =
@@ -593,7 +593,7 @@
   rd->RDMULT = av1_compute_rd_mult(
       cpi, cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q);
 
-  av1_set_error_per_bit(mv_costs, rd->RDMULT);
+  av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
 
   set_block_thresholds(cm, rd);
 
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 5c7397c..409e8ea 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -231,7 +231,7 @@
 void av1_initialize_rd_consts(struct AV1_COMP *cpi);
 
 // Sets the multiplier to convert mv cost to l1 error during motion search.
-void av1_set_sad_per_bit(const struct AV1_COMP *cpi, MvCosts *mv_costs,
+void av1_set_sad_per_bit(const struct AV1_COMP *cpi, int *sadperbit,
                          int qindex);
 
 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
@@ -281,8 +281,8 @@
                  BLOCK_SIZE block_size);
 
 // Sets the multiplier to convert mv cost to l2 error during motion search.
-static INLINE void av1_set_error_per_bit(MvCosts *mv_costs, int rdmult) {
-  mv_costs->errorperbit = AOMMAX(rdmult >> RD_EPB_SHIFT, 1);
+static INLINE void av1_set_error_per_bit(int *errorperbit, int rdmult) {
+  *errorperbit = AOMMAX(rdmult >> RD_EPB_SHIFT, 1);
 }
 
 // Get the threshold for R-D optimization of coefficients depending upon mode
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 2a02842..dff7276 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1087,8 +1087,8 @@
       for (int i = 0; i < 2; ++i) {
         const int_mv ref_mv = av1_get_ref_mv(x, i);
         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
-                                    x->mv_costs.nmv_joint_cost,
-                                    x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+                                    x->mv_costs->nmv_joint_cost,
+                                    x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
       }
     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
       if (valid_mv1) {
@@ -1097,8 +1097,8 @@
       }
       const int_mv ref_mv = av1_get_ref_mv(x, 1);
       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
-                                 x->mv_costs.nmv_joint_cost,
-                                 x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+                                 x->mv_costs->nmv_joint_cost,
+                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
     } else {
       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
       if (valid_mv0) {
@@ -1107,8 +1107,8 @@
       }
       const int_mv ref_mv = av1_get_ref_mv(x, 0);
       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
-                                 x->mv_costs.nmv_joint_cost,
-                                 x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+                                 x->mv_costs->nmv_joint_cost,
+                                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
     }
   } else {
     // Single ref case.
@@ -1420,8 +1420,8 @@
           if (mv0.as_int != mbmi->mv[0].as_int) {
             // Keep the refined MV and WM parameters.
             tmp_rate_mv = av1_mv_bit_cost(
-                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
-                x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+                &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
+                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
           } else {
             // Restore the old MV and WM parameters.
@@ -2304,8 +2304,8 @@
         const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost;
         const int_mv ref_mv = av1_get_ref_mv(x, 0);
         this_rate_mv = av1_mv_bit_cost(
-            &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
-            x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+            &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
+            x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
         const int this_cost = this_rate_mv + drl_cost;
 
         if (compare_cost <= this_cost) {
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 58142cc..fc9d420 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -1194,9 +1194,9 @@
   // is used somewhere unexpectedly. Should be resolved later.
   // Initialize errorperbit and sadperbit
   const int rdmult = av1_compute_rd_mult_based_on_qindex(cpi, TF_QINDEX);
-  MvCosts *mv_costs = &cpi->td.mb.mv_costs;
-  av1_set_error_per_bit(mv_costs, rdmult);
-  av1_set_sad_per_bit(cpi, mv_costs, TF_QINDEX);
+  MvCosts *mv_costs = cpi->td.mb.mv_costs;
+  av1_set_error_per_bit(&cpi->td.mb.errorperbit, rdmult);
+  av1_set_sad_per_bit(cpi, &cpi->td.mb.sadperbit, TF_QINDEX);
   av1_fill_mv_costs(cpi->common.fc,
                     cpi->common.features.cur_frame_force_integer_mv,
                     cpi->common.features.allow_high_precision_mv, mv_costs);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index c1ac34d..0e82627 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -1107,9 +1107,8 @@
   // Get rd multiplier set up.
   rdmult = (int)av1_compute_rd_mult(cpi, base_qindex);
   if (rdmult < 1) rdmult = 1;
-  MvCosts *mv_costs = &x->mv_costs;
-  av1_set_error_per_bit(mv_costs, rdmult);
-  av1_set_sad_per_bit(cpi, mv_costs, base_qindex);
+  av1_set_error_per_bit(&x->errorperbit, rdmult);
+  av1_set_sad_per_bit(cpi, &x->sadperbit, base_qindex);
 
   tpl_frame->is_valid = 1;
 
diff --git a/av1/encoder/tune_vmaf.c b/av1/encoder/tune_vmaf.c
index 03f6d8d..a1afa21 100644
--- a/av1/encoder/tune_vmaf.c
+++ b/av1/encoder/tune_vmaf.c
@@ -917,7 +917,7 @@
 
   *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
   *rdmult = AOMMAX(*rdmult, 0);
-  av1_set_error_per_bit(&x->mv_costs, *rdmult);
+  av1_set_error_per_bit(&x->errorperbit, *rdmult);
   aom_clear_system_state();
 }