Add support for multiple winner mode processing for key frame
- Introduced a speed feature to support multiple winner mode processing for
key frames
- Enabled winner mode tx size search speed feature for key frames
Both the changes are applicable from speed 4 and above.
Instruction Count
cpu-used Reduction Quality Loss
4 1.16% -0.04%
STATS_CHANGED
Change-Id: If3fd8d87c7f723c7b456477b26dbfd95b27c7b1f
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 6a57768..d46d28c 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -35,6 +35,12 @@
#define MC_FLOW_BSIZE_1D 16
#define MC_FLOW_NUM_PELS (MC_FLOW_BSIZE_1D * MC_FLOW_BSIZE_1D)
#define MAX_MC_FLOW_BLK_IN_SB (MAX_SB_SIZE / MC_FLOW_BSIZE_1D)
+#define MAX_WINNER_MODE_COUNT 3
+typedef struct {
+ MB_MODE_INFO mbmi;
+ int64_t rd;
+ uint8_t color_index_map[64 * 64];
+} WinnerModeStats;
typedef struct {
unsigned int sse;
@@ -241,6 +247,9 @@
MACROBLOCKD e_mbd;
MB_MODE_INFO_EXT *mbmi_ext;
MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame;
+ // Array of mode stats for winner mode processing
+ WinnerModeStats winner_mode_stats[MAX_WINNER_MODE_COUNT];
+ int winner_mode_count;
int skip_block;
int qindex;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fe2fdd2..02c9abb 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4220,6 +4220,51 @@
}
}
+// Store best mode stats for winner mode processing
+static void store_winner_mode_stats(MACROBLOCK *x, MB_MODE_INFO *mbmi,
+ int enable_multiwinner_mode_process,
+ uint8_t *color_map, BLOCK_SIZE bsize,
+ int64_t this_rd) {
+ WinnerModeStats *winner_mode_stats = x->winner_mode_stats;
+ int mode_idx = 0;
+ // Mode stat is not required when multiwinner mode processing is disabled
+ if (!enable_multiwinner_mode_process) return;
+
+ assert(x->winner_mode_count >= 0 &&
+ x->winner_mode_count <= MAX_WINNER_MODE_COUNT);
+
+ if (x->winner_mode_count) {
+ // Find the mode which has higher rd cost than this_rd
+ for (mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++)
+ if (winner_mode_stats[mode_idx].rd > this_rd) break;
+
+ if (mode_idx == MAX_WINNER_MODE_COUNT) {
+ // No mode has higher rd cost than this_rd
+ return;
+ } else if (mode_idx < MAX_WINNER_MODE_COUNT - 1) {
+ // Create a slot for current mode and move others to the next slot
+ memmove(
+ &winner_mode_stats[mode_idx + 1], &winner_mode_stats[mode_idx],
+ (MAX_WINNER_MODE_COUNT - mode_idx - 1) * sizeof(*winner_mode_stats));
+ }
+ }
+ // Add a mode stat for winner mode processing
+ winner_mode_stats[mode_idx].mbmi = *mbmi;
+ winner_mode_stats[mode_idx].rd = this_rd;
+ if (color_map) {
+ // Store color_index_map for palette mode
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ int block_width, block_height;
+ av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
+ &block_height, NULL, NULL);
+ memcpy(winner_mode_stats[mode_idx].color_index_map, color_map,
+ block_width * block_height * sizeof(color_map[0]));
+ }
+
+ x->winner_mode_count =
+ AOMMIN(x->winner_mode_count + 1, MAX_WINNER_MODE_COUNT);
+}
+
// Given the base colors as specified in centroids[], calculate the RD cost
// of palette mode.
static AOM_INLINE void palette_rd_y(
@@ -4269,6 +4314,9 @@
if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
tokenonly_rd_stats.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
}
+ // Collect mode stats for multiwinner mode processing
+ store_winner_mode_stats(x, mbmi, cpi->sf.enable_multiwinner_mode_process,
+ color_map, bsize, this_rd);
if (this_rd < *best_rd) {
*best_rd = this_rd;
// Setting beat_best_rd flag because current mode rd is better than best_rd.
@@ -4449,6 +4497,9 @@
intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
+ // Collect mode stats for multiwinner mode processing
+ store_winner_mode_stats(x, mbmi, cpi->sf.enable_multiwinner_mode_process,
+ NULL, bsize, this_rd);
if (this_rd < *best_rd) {
*best_rd = this_rd;
best_tx_size = mbmi->tx_size;
@@ -4708,19 +4759,19 @@
}
// Given selected prediction mode, search for the best tx type and size.
-static AOM_INLINE void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, const int *bmode_costs,
- int64_t *best_rd, int *rate,
- int *rate_tokenonly, int64_t *distortion,
- int *skippable, MB_MODE_INFO *best_mbmi,
- PICK_MODE_CONTEXT *ctx) {
+static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, const int *bmode_costs,
+ int64_t *best_rd, int *rate,
+ int *rate_tokenonly, int64_t *distortion,
+ int *skippable, MB_MODE_INFO *best_mbmi,
+ PICK_MODE_CONTEXT *ctx) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
RD_STATS rd_stats;
// In order to improve txfm search avoid rd based breakouts during winner
// mode evaluation. Hence passing ref_best_rd as a maximum value
super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
- if (rd_stats.rate == INT_MAX) return;
+ if (rd_stats.rate == INT_MAX) return 0;
int this_rate_tokenonly = rd_stats.rate;
if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
// super_block_yrd above includes the cost of the tx_size in the
@@ -4742,7 +4793,9 @@
*skippable = rd_stats.skip;
memcpy(ctx->blk_skip, x->blk_skip,
sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
+ return 1;
}
+ return 0;
}
// This function is used only for intra_only frames
@@ -4791,6 +4844,10 @@
set_mode_eval_params(cpi, x, MODE_EVAL);
MB_MODE_INFO best_mbmi = *mbmi;
+ x->winner_mode_count = 0;
+ // Initialize best mode stats for winner mode processing
+ store_winner_mode_stats(x, mbmi, cpi->sf.enable_multiwinner_mode_process,
+ NULL, bsize, best_rd);
/* Y Search for intra prediction mode */
for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) {
RD_STATS this_rd_stats;
@@ -4838,6 +4895,9 @@
this_rd_stats.rate +
intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
+ // Collect mode stats for multiwinner mode processing
+ store_winner_mode_stats(x, mbmi, cpi->sf.enable_multiwinner_mode_process,
+ NULL, bsize, this_rd);
if (this_rd < best_rd) {
best_mbmi = *mbmi;
best_rd = this_rd;
@@ -4871,17 +4931,59 @@
// function. In such cases winner mode processing is not necessary and return
// best_rd as INT64_MAX to indicate best mode is not identified
if (!beat_best_rd) return INT64_MAX;
- // If previous searches use only the default tx type/no R-D optimization of
- // quantized coeffs, do an extra search for the best tx type/better R-D
- // optimization of quantized coeffs
- if (is_winner_mode_processing_enabled(cpi, mbmi, best_mbmi.mode)) {
- // Set params for winner mode evaluation
- set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
- *mbmi = best_mbmi;
- intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly,
- distortion, skippable, &best_mbmi, ctx);
- }
+ // In multi-winner mode processing, perform tx search for few best modes
+ // identified during mode evaluation. Winner mode processing uses best tx
+ // configuration for tx search.
+ if (cpi->sf.enable_multiwinner_mode_process) {
+ int best_mode_idx = 0;
+ int block_width, block_height;
+ uint8_t *color_map_dst = xd->plane[PLANE_TYPE_Y].color_index_map;
+ av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
+ &block_height, NULL, NULL);
+
+ for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) {
+ *mbmi = x->winner_mode_stats[mode_idx].mbmi;
+ if (is_winner_mode_processing_enabled(cpi, mbmi, mbmi->mode)) {
+ // Restore color_map of palette mode before winner mode processing
+ if (mbmi->palette_mode_info.palette_size[0] > 0) {
+ uint8_t *color_map_src =
+ x->winner_mode_stats[mode_idx].color_index_map;
+ memcpy(color_map_dst, color_map_src,
+ block_width * block_height * sizeof(*color_map_src));
+ }
+ // Set params for winner mode evaluation
+ set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
+
+ // Winner mode processing
+ // If previous searches use only the default tx type/no R-D optimization
+ // of quantized coeffs, do an extra search for the best tx type/better
+ // R-D optimization of quantized coeffs
+ if (intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
+ rate_tokenonly, distortion, skippable, &best_mbmi,
+ ctx))
+ best_mode_idx = mode_idx;
+ }
+ }
+ // Copy color_map of palette mode for final winner mode
+ if (best_mbmi.palette_mode_info.palette_size[0] > 0) {
+ uint8_t *color_map_src =
+ x->winner_mode_stats[best_mode_idx].color_index_map;
+ memcpy(color_map_dst, color_map_src,
+ block_width * block_height * sizeof(*color_map_src));
+ }
+ } else {
+ // If previous searches use only the default tx type/no R-D optimization of
+ // quantized coeffs, do an extra search for the best tx type/better R-D
+ // optimization of quantized coeffs
+ if (is_winner_mode_processing_enabled(cpi, mbmi, best_mbmi.mode)) {
+ // Set params for winner mode evaluation
+ set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
+ *mbmi = best_mbmi;
+ intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate,
+ rate_tokenonly, distortion, skippable, &best_mbmi, ctx);
+ }
+ }
*mbmi = best_mbmi;
return best_rd;
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index c6c4369..196a29b 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -401,6 +401,10 @@
sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4;
sf->adaptive_txb_search_level = boosted ? 2 : 3;
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
+ sf->enable_winner_mode_for_tx_size_srch = 1;
+ // TODO(any): Extend multi-winner mode processing support for inter frames
+ sf->enable_multiwinner_mode_process =
+ frame_is_intra_only(&cpi->common) ? 1 : 0;
// TODO(any): Experiment with this speed feature set to 2 for higher quality
// presets as well
sf->skip_intra_in_interframe = 2;
@@ -876,6 +880,7 @@
sf->enable_winner_mode_for_coeff_opt = 0;
sf->enable_winner_mode_for_tx_size_srch = 0;
sf->enable_winner_mode_for_use_tx_domain_dist = 0;
+ sf->enable_multiwinner_mode_process = 0;
sf->prune_comp_type_by_model_rd = 0;
sf->disable_smooth_intra = 0;
sf->perform_best_rd_based_gating_for_chroma = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 643f33a..0aa8369 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -695,6 +695,9 @@
// domain distortion
int enable_winner_mode_for_use_tx_domain_dist;
+ // Flag used to enable processing of multiple winner modes
+ int enable_multiwinner_mode_process;
+
// Flag used to control the speed of the eob selection in trellis.
int trellis_eob_fast;