Create a function to zero out WinnerModeStats.
color_index_map in WinnerModeStats is a 16k array that is
set to 0 per block. We only need to zero out the block size.
This represents several percents of CPU saving for lossless encoding.
Change-Id: Ided22f7ebc46d2fc5bd36e6b64ddede8fb1189e8
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index e6bf9a5..d10d2cb 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -1260,6 +1260,27 @@
#undef SINGLE_REF_MODES
/*!\cond */
+// Zeroes out 'n_stats' elements in the array x->winner_mode_stats.
+// It only zeroes out what is necessary in 'color_index_map' (just the block
+// size, not the whole array).
+static INLINE void zero_winner_mode_stats(BLOCK_SIZE bsize, int n_stats,
+ WinnerModeStats *stats) {
+ const int block_height = block_size_high[bsize];
+ const int block_width = block_size_wide[bsize];
+ for (int i = 0; i < n_stats; ++i) {
+ WinnerModeStats *const stat = &stats[i];
+ memset(&stat->mbmi, 0, sizeof(stat->mbmi));
+ memset(&stat->rd_cost, 0, sizeof(stat->rd_cost));
+ memset(&stat->rd, 0, sizeof(stat->rd));
+ memset(&stat->rate_y, 0, sizeof(stat->rate_y));
+ memset(&stat->rate_uv, 0, sizeof(stat->rate_uv));
+ // Do not reset the whole array as it is CPU intensive.
+ memset(&stat->color_index_map, 0,
+ block_width * block_height * sizeof(stat->color_index_map[0]));
+ memset(&stat->mode_index, 0, sizeof(stat->mode_index));
+ }
+}
+
static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
static const char LUT[BLOCK_SIZES_ALL] = {
0, // BLOCK_4X4
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index c33733e..8c9f287 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -1132,7 +1132,8 @@
set_mode_eval_params(cpi, x, MODE_EVAL);
MB_MODE_INFO best_mbmi = *mbmi;
- av1_zero_array(x->winner_mode_stats, MAX_WINNER_MODE_COUNT_INTRA);
+ zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTRA,
+ x->winner_mode_stats);
x->winner_mode_count = 0;
// Searches the intra-modes except for intrabc, palette, and filter_intra.
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ef82af3..320063e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5443,7 +5443,8 @@
#endif // !CONFIG_REALTIME_ONLY
// Initialize best mode stats for winner mode processing
- av1_zero_array(x->winner_mode_stats, MAX_WINNER_MODE_COUNT_INTER);
+ zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTER,
+ x->winner_mode_stats);
x->winner_mode_count = 0;
store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
NULL, bsize, best_rd_so_far,