Allocate dv_costs when required

The structure 'dv_costs' is used to hold the cost of signalling
the dv in the bitstream in case of intraBC mode. This patch
allocates 'dv_costs' only when intraBC mode needs to be evaluated.
As 'dv_costs' is not used during first pass, the code related
to its allocation is cleaned up from fp_prepare_enc_workers().

For AVIF image encode with speed = 9,

             HEAP Memory reduction(%)
Resolution   threads=1    threads=4
640x360        3.25         2.64
768x512        2.06         2.28
832x480        2.14         1.84
1280x720       1.03         0.92

HEAP memory reduction was measured using the following command.
$valgrind --tool=massif ./avifenc ...

Change-Id: If40abe0bde968edc067b56143e9320078f57dad9
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 6279c66..ad38eb5 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -27,6 +27,7 @@
 #include "av1/common/blockd.h"
 #include "av1/common/entropymode.h"
 #include "av1/common/enums.h"
+#include "av1/common/reconintra.h"
 #include "av1/common/resize.h"
 #include "av1/common/thread_common.h"
 #include "av1/common/timing.h"
@@ -3731,6 +3732,12 @@
            (cpi->compressor_stage == ENCODE_STAGE) && cpi->ppi->lap_enabled));
 }
 
+// Decide whether 'dv_costs' need to be allocated/stored during the encoding.
+static AOM_INLINE bool av1_need_dv_costs(const AV1_COMP *const cpi) {
+  return !cpi->sf.rt_sf.use_nonrd_pick_mode &&
+         av1_allow_intrabc(&cpi->common) && !is_stat_generation_stage(cpi);
+}
+
 /*!\endcond */
 /*!\brief Check if the current stage has statistics
  *
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 85278f6..89784c5 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -74,13 +74,6 @@
                     (MvCosts *)aom_calloc(1, sizeof(MvCosts)));
   }
 
-  if (cpi->td.mb.dv_costs) {
-    aom_free(cpi->td.mb.dv_costs);
-    cpi->td.mb.dv_costs = NULL;
-  }
-  CHECK_MEM_ERROR(cm, cpi->td.mb.dv_costs,
-                  (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.mb.dv_costs)));
-
   av1_setup_shared_coeff_buffer(cm->seq_params, &cpi->td.shared_coeff_buf,
                                 cm->error);
   av1_setup_sms_tree(cpi, &cpi->td);
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 09acf1a..b95457a 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -1297,7 +1297,8 @@
         memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
                sizeof(MvCosts));
       }
-      if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
+      if ((cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) &&
+          av1_need_dv_costs(cpi)) {
         CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
                         (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
         memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
@@ -1375,12 +1376,6 @@
         memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
                sizeof(MvCosts));
       }
-      if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
-        CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
-                        (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
-        memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
-               sizeof(IntraBCMVCosts));
-      }
     }
 
     av1_alloc_mb_data(cm, &thread_data->td->mb,
@@ -1662,9 +1657,7 @@
       if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
         aom_free(thread_data->td->mb.mv_costs);
       }
-      if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
-        aom_free(thread_data->td->mb.dv_costs);
-      }
+      assert(!thread_data->td->mb.dv_costs);
     }
     av1_dealloc_mb_data(cm, &thread_data->td->mb);
   }
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 7ada9b9..17c7960 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -749,8 +749,12 @@
     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
 
   // Frame level dv cost update
-  if (!use_nonrd_pick_mode && av1_allow_intrabc(cm) &&
-      !is_stat_generation_stage(cpi)) {
+  if (av1_need_dv_costs(cpi)) {
+    if (cpi->td.mb.dv_costs == NULL) {
+      CHECK_MEM_ERROR(
+          cm, cpi->td.mb.dv_costs,
+          (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.mb.dv_costs)));
+    }
     av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
   }
 }