Create a function to zero out WinnerModeStats.

color_index_map in WinnerModeStats is a 16k array that is
set to 0 per block. We only need to zero out the block size.
This represents several percents of CPU saving for lossless encoding.

Change-Id: Ided22f7ebc46d2fc5bd36e6b64ddede8fb1189e8
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index e6bf9a5..d10d2cb 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1260,6 +1260,27 @@
 #undef SINGLE_REF_MODES
 
 /*!\cond */
+// Zeroes out 'n_stats' elements in the array x->winner_mode_stats.
+// It only zeroes out what is necessary in 'color_index_map' (just the block
+// size, not the whole array).
+static INLINE void zero_winner_mode_stats(BLOCK_SIZE bsize, int n_stats,
+                                          WinnerModeStats *stats) {
+  const int block_height = block_size_high[bsize];
+  const int block_width = block_size_wide[bsize];
+  for (int i = 0; i < n_stats; ++i) {
+    WinnerModeStats *const stat = &stats[i];
+    memset(&stat->mbmi, 0, sizeof(stat->mbmi));
+    memset(&stat->rd_cost, 0, sizeof(stat->rd_cost));
+    memset(&stat->rd, 0, sizeof(stat->rd));
+    memset(&stat->rate_y, 0, sizeof(stat->rate_y));
+    memset(&stat->rate_uv, 0, sizeof(stat->rate_uv));
+    // Do not reset the whole array as it is CPU intensive.
+    memset(&stat->color_index_map, 0,
+           block_width * block_height * sizeof(stat->color_index_map[0]));
+    memset(&stat->mode_index, 0, sizeof(stat->mode_index));
+  }
+}
+
 static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
   static const char LUT[BLOCK_SIZES_ALL] = {
     0,  // BLOCK_4X4
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index c33733e..8c9f287 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -1132,7 +1132,8 @@
   set_mode_eval_params(cpi, x, MODE_EVAL);
 
   MB_MODE_INFO best_mbmi = *mbmi;
-  av1_zero_array(x->winner_mode_stats, MAX_WINNER_MODE_COUNT_INTRA);
+  zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTRA,
+                         x->winner_mode_stats);
   x->winner_mode_count = 0;
 
   // Searches the intra-modes except for intrabc, palette, and filter_intra.
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ef82af3..320063e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5443,7 +5443,8 @@
 #endif  // !CONFIG_REALTIME_ONLY
 
   // Initialize best mode stats for winner mode processing
-  av1_zero_array(x->winner_mode_stats, MAX_WINNER_MODE_COUNT_INTER);
+  zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTER,
+                         x->winner_mode_stats);
   x->winner_mode_count = 0;
   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
                           NULL, bsize, best_rd_so_far,