AV1 RT: Don't check leaf split if merged block can be skipped
~20% speed up on speed 7 lowres with 0.5% BDRate degradation overall and
1.1% max BDRate degradation. On midres the max degradation is 4% so
turning this off for 480p and higher
Change-Id: I1d97ca9ece8842f71d334d028d2b0b1b00ccda04
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9f1e0c7..a46d228 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2316,12 +2316,12 @@
}
break;
case PARTITION_SPLIT:
- if (cpi->sf.rt_sf.nonrd_check_partition_merge &&
+ if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
!frame_is_intra_only(cm)) {
RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
RD_STATS split_rdc, none_rdc;
- av1_init_rd_stats(&split_rdc);
+ av1_invalid_rd_stats(&split_rdc);
av1_invalid_rd_stats(&none_rdc);
save_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
xd->above_txfm_context =
@@ -2335,32 +2335,36 @@
none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
+ if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode != 2 ||
+ none_rdc.skip != 1 || pc_tree->none.mic.mode == NEWMV) {
+ av1_init_rd_stats(&split_rdc);
+ for (int i = 0; i < 4; i++) {
+ RD_STATS block_rdc;
+ av1_invalid_rd_stats(&block_rdc);
+ int x_idx = (i & 1) * hbs;
+ int y_idx = (i >> 1) * hbs;
+ if ((mi_row + y_idx >= cm->mi_rows) ||
+ (mi_col + x_idx >= cm->mi_cols))
+ continue;
+ xd->above_txfm_context =
+ cm->above_txfm_context[tile_info->tile_row] + mi_col + x_idx;
+ xd->left_txfm_context = xd->left_txfm_context_buffer +
+ ((mi_row + y_idx) & MAX_MIB_MASK);
+ pc_tree->split[i]->partitioning = PARTITION_NONE;
+ pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
+ &block_rdc, PARTITION_NONE, subsize,
+ &pc_tree->split[i]->none, invalid_rd,
+ PICK_MODE_NONRD);
+ split_rdc.rate += block_rdc.rate;
+ split_rdc.dist += block_rdc.dist;
- for (int i = 0; i < 4; i++) {
- RD_STATS block_rdc;
- av1_invalid_rd_stats(&block_rdc);
- int x_idx = (i & 1) * hbs;
- int y_idx = (i >> 1) * hbs;
- if ((mi_row + y_idx >= cm->mi_rows) ||
- (mi_col + x_idx >= cm->mi_cols))
- continue;
- xd->above_txfm_context =
- cm->above_txfm_context[tile_info->tile_row] + mi_col + x_idx;
- xd->left_txfm_context =
- xd->left_txfm_context_buffer + ((mi_row + y_idx) & MAX_MIB_MASK);
- pc_tree->split[i]->partitioning = PARTITION_NONE;
- pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
- &block_rdc, PARTITION_NONE, subsize,
- &pc_tree->split[i]->none, invalid_rd, PICK_MODE_NONRD);
- split_rdc.rate += block_rdc.rate;
- split_rdc.dist += block_rdc.dist;
-
- encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
- subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+ encode_b(cpi, tile_data, td, tp, mi_row + y_idx, mi_col + x_idx, 1,
+ subsize, PARTITION_NONE, &pc_tree->split[i]->none, NULL);
+ }
+ restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
+ split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
+ split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
}
- restore_context(x, &x_ctx, mi_row, mi_col, bsize, 3);
- split_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
- split_rdc.rdcost = RDCOST(x->rdmult, split_rdc.rate, split_rdc.dist);
if (none_rdc.rdcost < split_rdc.rdcost) {
mib[0]->sb_type = bsize;
pc_tree->partitioning = PARTITION_NONE;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 963796c..6230c17 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -257,6 +257,9 @@
}
}
if (!is_480p_or_larger) {
+ if (speed == 7) {
+ sf->rt_sf.nonrd_check_partition_merge_mode = 2;
+ }
if (speed >= 8) {
sf->mv_sf.subpel_search_method = SUBPEL_TREE;
@@ -835,7 +838,7 @@
sf->rt_sf.use_comp_ref_nonrd = 0;
sf->rt_sf.use_nonrd_altref_frame = 1;
sf->rt_sf.use_nonrd_pick_mode = 1;
- sf->rt_sf.nonrd_check_partition_merge = 1;
+ sf->rt_sf.nonrd_check_partition_merge_mode = 1;
sf->rt_sf.nonrd_check_partition_split = 0;
sf->rt_sf.hybrid_intra_pickmode = 1;
}
@@ -847,7 +850,7 @@
sf->rt_sf.nonrd_use_blockyrd_interp_filter = 0;
sf->rt_sf.use_nonrd_altref_frame = 0;
sf->rt_sf.nonrd_reduce_golden_mode_search = 1;
- sf->rt_sf.nonrd_check_partition_merge = 0;
+ sf->rt_sf.nonrd_check_partition_merge_mode = 0;
sf->rt_sf.nonrd_check_partition_split = 0;
// TODO(kyslov) Enable when better model is available
@@ -1086,7 +1089,7 @@
rt_sf->force_tx_search_off = 0;
rt_sf->num_inter_modes_for_tx_search = INT_MAX;
rt_sf->use_simple_rd_model = 0;
- rt_sf->nonrd_check_partition_merge = 0;
+ rt_sf->nonrd_check_partition_merge_mode = 0;
rt_sf->nonrd_check_partition_split = 0;
}
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 5ae0308..abac1a8 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -864,9 +864,13 @@
// Perform coarse ME before calculating variance in variance-based partition
int estimate_motion_for_var_based_partition;
- // For nonrd_use_partition: perform extra check of leaf partition split and
- // merge
- int nonrd_check_partition_merge;
+ // For nonrd_use_partition: mode of extra check of leaf partition
+ // 0 - don't check merge
+ // 1 - always check merge
+ // 2 - check merge and prune checking final split
+ int nonrd_check_partition_merge_mode;
+
+ // For nonrd_use_partition: check of leaf partition extra split
int nonrd_check_partition_split;
// Implements various heuristics to skip searching modes