Add a speed feature to limit ref frame search
Limit the ref frame candidates to those picked during the initial
partition search(the adaptive_txb_search speed feature).
Compression quality impact is neutral.
Encoding speed improvement on top of speed 1(30 frames):
QP=20 QP=40
akiyo_cif: 15% 22%
cheer_cif: 2% 1%
city_cif: 10% 14%
coastguard_cif: 8% 16%
container_cif: 6.5% 17.5%
crew_cif: 8.5% 11.5%
AVERAGE: 8.4% 13.8%
Enabled for speed 1 and above.
STATS_CHANGED
Change-Id: I5f43fb7c4e9932240f8d163bad663e8249952fa3
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 9ae6366..643861f 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -154,6 +154,10 @@
// cost in the first pass search.
int cb_partition_scan;
+ // If 0, do not allow corresponding ref frame during RD search.
+ uint8_t ref0_candidate_mask[REF_FRAMES + 1]; // The last entry is a counter.
+ uint8_t ref1_candidate_mask[REF_FRAMES];
+
// Activate constrained coding block partition search range.
int use_cb_search_range;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index ecba645..f403792 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3536,11 +3536,17 @@
reset_partition(pc_root, cm->seq_params.sb_size);
x->use_cb_search_range = 0;
+ memset(x->ref0_candidate_mask, 1, sizeof(x->ref0_candidate_mask));
+ memset(x->ref1_candidate_mask, 1, sizeof(x->ref1_candidate_mask));
if (cpi->sf.two_pass_partition_search &&
mi_row + mi_size_high[cm->seq_params.sb_size] < cm->mi_rows &&
mi_col + mi_size_wide[cm->seq_params.sb_size] < cm->mi_cols &&
cm->frame_type != KEY_FRAME) {
x->cb_partition_scan = 1;
+ if (sf->mode_pruning_based_on_two_pass_partition_search) {
+ av1_zero(x->ref0_candidate_mask);
+ av1_zero(x->ref1_candidate_mask);
+ }
rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col,
cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
pc_root, NULL);
@@ -3573,6 +3579,22 @@
}
x->use_cb_search_range = 1;
+
+ if (sf->mode_pruning_based_on_two_pass_partition_search) {
+ if (x->ref0_candidate_mask[REF_FRAMES] < 16) {
+ // If there are not enough samples recorded, make all available.
+ memset(x->ref0_candidate_mask, 1, sizeof(x->ref0_candidate_mask));
+ memset(x->ref1_candidate_mask, 1, sizeof(x->ref1_candidate_mask));
+ } else if (sf->selective_ref_frame < 2) {
+ // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the initial
+ // partition scan, so we don't eliminate them.
+ x->ref0_candidate_mask[ALTREF2_FRAME] = 1;
+ x->ref1_candidate_mask[ALTREF2_FRAME] = 1;
+ x->ref0_candidate_mask[BWDREF_FRAME] = 1;
+ x->ref1_candidate_mask[BWDREF_FRAME] = 1;
+ }
+ }
+
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
pc_root, NULL);
@@ -4632,6 +4654,15 @@
const int mi_height = mi_size_high[bsize];
const int is_inter = is_inter_block(mbmi);
+ if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
+ x->cb_partition_scan) {
+ // Increase the counter of data samples.
+ ++x->ref0_candidate_mask[REF_FRAMES];
+ // Record that ref_frame[0] and ref_frame[1] are picked.
+ x->ref0_candidate_mask[mbmi->ref_frame[0]] = 1;
+ if (mbmi->ref_frame[1] >= 0) x->ref1_candidate_mask[mbmi->ref_frame[1]] = 1;
+ }
+
if (!is_inter) {
xd->cfl.is_chroma_reference = is_chroma_reference(
mi_row, mi_col, bsize, cm->subsampling_x, cm->subsampling_y);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 47ba46c..db66b37 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -8993,6 +8993,13 @@
const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame;
const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
+ if (cpi->sf.mode_pruning_based_on_two_pass_partition_search &&
+ !x->cb_partition_scan) {
+ if (!x->ref0_candidate_mask[ref_frame[0]] ||
+ (ref_frame[1] >= 0 && !x->ref1_candidate_mask[ref_frame[1]]))
+ return 1;
+ }
+
if (ref_frame[0] > INTRA_FRAME && ref_frame[1] == INTRA_FRAME) {
// Mode must by compatible
if (!is_interintra_allowed_mode(this_mode)) return 1;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index a3cb7a0..c573ecb 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -148,6 +148,7 @@
sf->tx_size_search_init_depth_sqr = 1;
sf->tx_size_search_lgr_block = 1;
sf->two_pass_partition_search = 1;
+ sf->mode_pruning_based_on_two_pass_partition_search = 1;
sf->prune_ext_partition_types_search = 1;
sf->use_fast_interpolation_filter_search = 1;
sf->tx_type_search.skip_tx_search = 1;
@@ -445,6 +446,7 @@
sf->txb_split_cap = 1;
sf->adaptive_txb_search = 0;
sf->two_pass_partition_search = 0;
+ sf->mode_pruning_based_on_two_pass_partition_search = 0;
sf->use_intra_txb_hash = 0;
sf->use_inter_txb_hash = 1;
sf->use_mb_rd_hash = 1;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index d6edab9..4e38709 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -383,6 +383,10 @@
// 2-pass coding block partition search
int two_pass_partition_search;
+ // Use the mode decisions made in the initial partition search to prune mode
+ // candidates, e.g. ref frames.
+ int mode_pruning_based_on_two_pass_partition_search;
+
// Skip rectangular partition test when partition type none gives better
// rd than partition type split.
int less_rectangular_check;