Reset hash logic before winner mode processing

Hash logic is reset before winner mode evaluation
to avoid incorrect usage.

            Instruction count
cpu-used     Reduction      	BD rate impact
   3            0.0%           	-0.07%
   4            0.3%           	-0.01%

STATS_CHANGED

Change-Id: Icd4537eac3e100b00d8fd29bcaef992d9a6e0d52
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 1c8de01..b50af0f 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -282,6 +282,35 @@
   return coeff_opt_thresh;
 }
 
+// Used to reset the state of tx/mb rd hash information
+static INLINE void reset_hash_records(MACROBLOCK *const x) {
+  int32_t record_idx;
+
+  // Reset the state for use_inter_txb_hash
+  for (record_idx = 0; record_idx < ((MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1));
+       record_idx++)
+    x->txb_rd_record_8X8[record_idx].num =
+        x->txb_rd_record_8X8[record_idx].index_start = 0;
+  for (record_idx = 0; record_idx < ((MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2));
+       record_idx++)
+    x->txb_rd_record_16X16[record_idx].num =
+        x->txb_rd_record_16X16[record_idx].index_start = 0;
+  for (record_idx = 0; record_idx < ((MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3));
+       record_idx++)
+    x->txb_rd_record_32X32[record_idx].num =
+        x->txb_rd_record_32X32[record_idx].index_start = 0;
+  for (record_idx = 0; record_idx < ((MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4));
+       record_idx++)
+    x->txb_rd_record_64X64[record_idx].num =
+        x->txb_rd_record_64X64[record_idx].index_start = 0;
+
+  // Reset the state for use_intra_txb_hash
+  x->txb_rd_record_intra.num = x->txb_rd_record_intra.index_start = 0;
+
+  // Reset the state for use_mb_rd_hash
+  x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
+}
+
 void av1_setup_pred_block(const MACROBLOCKD *xd,
                           struct buf_2d dst[MAX_MB_PLANE],
                           const YV12_BUFFER_CONFIG *src,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index e1d92d4..00f0a3e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -11732,81 +11732,83 @@
   MB_MODE_INFO *const mbmi = xd->mi[0];
   const int num_planes = av1_num_planes(cm);
 
-  if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index != THR_INVALID &&
-      is_winner_mode_processing_enabled(cpi, mbmi, best_mbmode->mode)) {
-    int skip_blk = 0;
-    RD_STATS rd_stats_y, rd_stats_uv;
-    const int skip_ctx = av1_get_skip_context(xd);
-
+  if (is_winner_mode_processing_enabled(cpi, mbmi, best_mbmode->mode)) {
     // Set params for winner mode evaluation
     set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
 
-    *mbmi = *best_mbmode;
+    if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index != THR_INVALID) {
+      int skip_blk = 0;
+      RD_STATS rd_stats_y, rd_stats_uv;
+      const int skip_ctx = av1_get_skip_context(xd);
 
-    set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+      *mbmi = *best_mbmode;
 
-    // Select prediction reference frames.
-    for (int i = 0; i < num_planes; i++) {
-      xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
-      if (has_second_ref(mbmi))
-        xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
-    }
+      set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
 
-    if (is_inter_mode(mbmi->mode)) {
-      const int mi_row = xd->mi_row;
-      const int mi_col = xd->mi_col;
-      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
-                                    av1_num_planes(cm) - 1);
-      if (mbmi->motion_mode == OBMC_CAUSAL)
-        av1_build_obmc_inter_predictors_sb(cm, xd);
+      // Select prediction reference frames.
+      for (int i = 0; i < num_planes; i++) {
+        xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
+        if (has_second_ref(mbmi))
+          xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
+      }
 
-      av1_subtract_plane(x, bsize, 0);
-      if (x->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
-        pick_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
-        assert(rd_stats_y.rate != INT_MAX);
+      if (is_inter_mode(mbmi->mode)) {
+        const int mi_row = xd->mi_row;
+        const int mi_col = xd->mi_col;
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+                                      av1_num_planes(cm) - 1);
+        if (mbmi->motion_mode == OBMC_CAUSAL)
+          av1_build_obmc_inter_predictors_sb(cm, xd);
+
+        av1_subtract_plane(x, bsize, 0);
+        if (x->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
+          pick_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+          assert(rd_stats_y.rate != INT_MAX);
+        } else {
+          super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
+          memset(mbmi->inter_tx_size, mbmi->tx_size,
+                 sizeof(mbmi->inter_tx_size));
+          for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
+            set_blk_skip(x, 0, i, rd_stats_y.skip);
+        }
       } else {
         super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
-        memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
-        for (int i = 0; i < xd->n4_h * xd->n4_w; ++i)
-          set_blk_skip(x, 0, i, rd_stats_y.skip);
       }
-    } else {
-      super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
-    }
 
-    if (num_planes > 1) {
-      super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
-    } else {
-      av1_init_rd_stats(&rd_stats_uv);
-    }
+      if (num_planes > 1) {
+        super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
+      } else {
+        av1_init_rd_stats(&rd_stats_uv);
+      }
 
-    if (RDCOST(x->rdmult,
-               x->skip_cost[skip_ctx][0] + rd_stats_y.rate + rd_stats_uv.rate,
-               (rd_stats_y.dist + rd_stats_uv.dist)) >
-        RDCOST(x->rdmult, x->skip_cost[skip_ctx][1],
-               (rd_stats_y.sse + rd_stats_uv.sse))) {
-      skip_blk = 1;
-      rd_stats_y.rate = x->skip_cost[skip_ctx][1];
-      rd_stats_uv.rate = 0;
-      rd_stats_y.dist = rd_stats_y.sse;
-      rd_stats_uv.dist = rd_stats_uv.sse;
-    } else {
-      skip_blk = 0;
-      rd_stats_y.rate += x->skip_cost[skip_ctx][0];
-    }
+      if (RDCOST(x->rdmult,
+                 x->skip_cost[skip_ctx][0] + rd_stats_y.rate + rd_stats_uv.rate,
+                 (rd_stats_y.dist + rd_stats_uv.dist)) >
+          RDCOST(x->rdmult, x->skip_cost[skip_ctx][1],
+                 (rd_stats_y.sse + rd_stats_uv.sse))) {
+        skip_blk = 1;
+        rd_stats_y.rate = x->skip_cost[skip_ctx][1];
+        rd_stats_uv.rate = 0;
+        rd_stats_y.dist = rd_stats_y.sse;
+        rd_stats_uv.dist = rd_stats_uv.sse;
+      } else {
+        skip_blk = 0;
+        rd_stats_y.rate += x->skip_cost[skip_ctx][0];
+      }
 
-    if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
-        RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
-               (rd_stats_y.dist + rd_stats_uv.dist))) {
-      best_mbmode->tx_size = mbmi->tx_size;
-      av1_copy(best_mbmode->inter_tx_size, mbmi->inter_tx_size);
-      av1_copy_array(ctx->blk_skip, x->blk_skip, ctx->num_4x4_blk);
-      av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
-      rd_cost->rate +=
-          (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
-      rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
-      rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
-      *best_skip2 = skip_blk;
+      if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
+          RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
+                 (rd_stats_y.dist + rd_stats_uv.dist))) {
+        best_mbmode->tx_size = mbmi->tx_size;
+        av1_copy(best_mbmode->inter_tx_size, mbmi->inter_tx_size);
+        av1_copy_array(ctx->blk_skip, x->blk_skip, ctx->num_4x4_blk);
+        av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
+        rd_cost->rate +=
+            (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
+        rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
+        rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
+        *best_skip2 = skip_blk;
+      }
     }
   }
 }
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index add4e41..b371c95 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -374,6 +374,14 @@
       // Set default transform type prune mode for winner mode evaluation
       set_tx_type_prune(
           sf, x, sf->tx_type_search.enable_winner_mode_tx_type_pruning, 1);
+
+      // Reset hash state for winner mode processing. Winner mode and subsequent
+      // transform/mode evaluations (palette/IntraBC) cann't reuse old data as
+      // the decisions would have been sub-optimal
+      // TODO(any): Move the evaluation of palette/IntraBC modes before winner
+      // mode is processed and clean-up the code below
+      reset_hash_records(x);
+
       break;
     default: assert(0);
   }
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6596384..38cf4a7 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -396,8 +396,6 @@
     // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
     // and clean-up the speed feature
     sf->perform_best_rd_based_gating_for_chroma = 1;
-    // TODO(any): Experiment on the dependency of this speed feature with
-    // use_intra_txb_hash, use_inter_txb_hash and use_mb_rd_hash speed features
     // TODO(any): Refactor the code related to following winner mode speed
     // features
     sf->enable_winner_mode_for_coeff_opt = 1;
@@ -421,6 +419,8 @@
 
   if (speed >= 4) {
     sf->selective_ref_frame = 4;
+    // TODO(any): Experiment with enabling of this speed feature as hash state
+    // is reset during winner mode processing
     sf->use_intra_txb_hash = 0;
     sf->tx_type_search.fast_intra_tx_type_search = 1;
     sf->disable_loop_restoration_chroma =