Prune HORZ4/VERT4 based on HORZ/VERT of split
This patch adds a speed feature prune_4_partition_using_split_info
to prune HORZ4/VERT4 partitions based on HORZ/VERT winner
info from split partitions.
This speed feature is enabled for cpu-used >= 3.
Encode Time Quality loss
cpu-used Reduction avg.psnr ovr.psnr ssim
3 2.348% 0.0583% 0.0653% 0.0711%
4 0.772% 0.0383% 0.0311% 0.0221%
Change-Id: I03e55a7461727cb2de8e2aaa4309bea172cb0736
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 687d688..c1bbb7c 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2584,6 +2584,12 @@
}
}
+// Structure to keep win flags for HORZ and VERT partition evaluations
+typedef struct {
+ bool horz_win;
+ bool vert_win;
+} RD_RECT_PART_WIN_INFO;
+
// TODO(jinging,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
@@ -2593,7 +2599,8 @@
BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part,
RD_STATS *rd_cost, RD_STATS best_rdc,
PC_TREE *pc_tree, int64_t *none_rd,
- SB_MULTI_PASS_MODE multi_pass_mode) {
+ SB_MULTI_PASS_MODE multi_pass_mode,
+ RD_RECT_PART_WIN_INFO *rect_part_win_info) {
const AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
TileInfo *const tile_info = &tile_data->tile_info;
@@ -2626,6 +2633,10 @@
int horz_ctx_is_ready = 0;
int vert_ctx_is_ready = 0;
BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
+ // Initialise HORZ and VERT win flags as true for all split partitions
+ RD_RECT_PART_WIN_INFO split_part_rect_win[4] = {
+ { true, true }, { true, true }, { true, true }, { true, true }
+ };
bool found_best_partition = false;
if (best_rdc.rdcost < 0) {
@@ -2977,7 +2988,8 @@
if (!rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
mi_col + x_idx, subsize, max_sq_part, min_sq_part,
&this_rdc, best_remain_rdcost, pc_tree->split[idx],
- p_split_rd, multi_pass_mode)) {
+ p_split_rd, multi_pass_mode,
+ &split_part_rect_win[idx])) {
av1_invalid_rd_stats(&sum_rdc);
break;
}
@@ -3128,6 +3140,11 @@
found_best_partition = true;
pc_tree->partitioning = PARTITION_HORZ;
}
+ } else {
+ // Update HORZ win flag
+ if (rect_part_win_info != NULL) {
+ rect_part_win_info->horz_win = false;
+ }
}
restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
@@ -3210,6 +3227,11 @@
best_rdc = sum_rdc;
found_best_partition = true;
pc_tree->partitioning = PARTITION_VERT;
+ } else {
+ // Update VERT win flag
+ if (rect_part_win_info != NULL) {
+ rect_part_win_info->vert_win = false;
+ }
}
restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
@@ -3539,6 +3561,27 @@
partition_vert4_allowed = 0;
}
+ if (cpi->sf.part_sf.prune_4_partition_using_split_info &&
+ (partition_horz4_allowed || partition_vert4_allowed)) {
+ // Count of child blocks in which HORZ or VERT partition has won
+ int num_child_horz_win = 0, num_child_vert_win = 0;
+ for (int idx = 0; idx < 4; idx++) {
+ num_child_horz_win += (split_part_rect_win[idx].horz_win) ? 1 : 0;
+ num_child_vert_win += (split_part_rect_win[idx].vert_win) ? 1 : 0;
+ }
+
+ // Prune HORZ4/VERT4 partitions based on number of HORZ/VERT winners of
+ // split partiitons.
+ // Conservative pruning for high quantizers
+ const int num_win_thresh = 3 * (MAXQ - x->qindex) / MAXQ + 1;
+ if (num_child_horz_win < num_win_thresh) {
+ partition_horz4_allowed = 0;
+ }
+ if (num_child_vert_win < num_win_thresh) {
+ partition_vert4_allowed = 0;
+ }
+ }
+
// PARTITION_HORZ_4
assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed));
if (!terminate_partition_search && partition_horz4_allowed && has_rows &&
@@ -4578,14 +4621,14 @@
if (num_passes == 1) {
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
- pc_root, NULL, SB_SINGLE_PASS);
+ pc_root, NULL, SB_SINGLE_PASS, NULL);
} else {
// First pass
SB_FIRST_PASS_STATS sb_fp_stats;
backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
- pc_root, NULL, SB_DRY_PASS);
+ pc_root, NULL, SB_DRY_PASS, NULL);
// Second pass
init_encode_rd_sb(cpi, td, tile_data, pc_root, &dummy_rdc, mi_row, mi_col,
@@ -4597,7 +4640,7 @@
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
max_sq_size, min_sq_size, &dummy_rdc, dummy_rdc,
- pc_root, NULL, SB_WET_PASS);
+ pc_root, NULL, SB_WET_PASS, NULL);
}
#if CONFIG_COLLECT_COMPONENT_TIMING
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index d5c9bc9..6b0e1a6 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -437,6 +437,8 @@
sf->part_sf.less_rectangular_check_level = 2;
sf->part_sf.simple_motion_search_prune_agg = 1;
+ sf->part_sf.prune_4_partition_using_split_info =
+ cm->allow_screen_content_tools ? 0 : 1;
// adaptive_motion_search breaks encoder multi-thread tests.
// The values in x->pred_mv[] differ for single and multi-thread cases.
@@ -931,6 +933,7 @@
part_sf->simple_motion_search_early_term_none = 0;
part_sf->intra_cnn_split = 0;
part_sf->ext_partition_eval_thresh = BLOCK_8X8;
+ part_sf->prune_4_partition_using_split_info = 0;
}
static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index bb5cb6d..d610383 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -428,6 +428,9 @@
// Disable extended partition search for lower block sizes.
int ext_partition_eval_thresh;
+
+ // Prune 1:4 partition search based on winner info from split partitions
+ int prune_4_partition_using_split_info;
} PARTITION_SPEED_FEATURES;
typedef struct MV_SPEED_FEATURES {