Use better simple_motion_search_split on speed 3+
The current model can lead to high PSNR drop on low bitrate high
resolution videos. Using a higher quality model solves this issue and
leads to some speed gain on lowres and midres.
Performance on speed 3:
TESTSET | AVG_PSNR | OVR_PSNR | SSIM | VMAF | AVG_SPD | OVR_SPD
LOWRES | +0.014% | +0.016% | +0.115% | +0.044% | +5.547% | +7.604%
MIDRES | -0.213% | -0.217% | -0.218% | -0.382% | +3.049% | +4.310%
HDRES | -0.685% | -0.669% | -0.543% | -1.331% | -0.368% | +0.041%
BUG=aomedia:2365
STATS_CHANGED
Change-Id: I798abc39ae35483fb0a15ffdabec1cf55058179f
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 8de19cc..926e30a 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2701,7 +2701,7 @@
// Use simple_motion_search to prune partitions. This must be done prior to
// PARTITION_SPLIT to propagate the initial mvs to a smaller blocksize.
const int try_split_only =
- cpi->sf.simple_motion_search_split_only && do_square_split &&
+ cpi->sf.simple_motion_search_split && do_square_split &&
bsize >= BLOCK_8X8 && mi_row + mi_size_high[bsize] <= cm->mi_rows &&
mi_col + mi_size_wide[bsize] <= cm->mi_cols && !frame_is_intra_only(cm) &&
!av1_superres_scaled(cm);
@@ -4244,7 +4244,7 @@
PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2];
pc_root->index = 0;
- if ((sf->simple_motion_search_split_only ||
+ if ((sf->simple_motion_search_split ||
sf->simple_motion_search_prune_rect ||
sf->simple_motion_search_early_term_none ||
sf->firstpass_simple_motion_search_early_term) &&
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index f21f0d7..3d2a947 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -25,116 +25,7 @@
AV1_COMP *const cpi, MACROBLOCK *x, PC_TREE *pc_tree, int mi_row,
int mi_col, BLOCK_SIZE bsize, float *features, int features_to_get);
-// Performs a simple_motion_search with a single reference frame and extract
-// the variance of residues. Here features is assumed to be a length 6 array.
-// After this function is called, we will store the following in to features:
-// features[0] = log(1 + dc_q**2/256)
-// features[1] = log(1 + variance_of_residue)
-// for i in [2, 3, 4, 5]:
-// features[i] = log(1 + variance_of_residue_in_block[i]/variance_of_residue)
-static void get_res_var_features(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- float *features) {
- // TODO(chiyotsai@google.com): The data this model trained on did not also use
- // SIMPLE_TRANSLATION to build the inter_predictor. Retraining and tuning the
- // model with the correct data should give better performance.
- assert(mi_size_wide[bsize] == mi_size_high[bsize]);
-
- MACROBLOCKD *xd = &x->e_mbd;
-
- // Perform a single motion search in Y_PLANE to make a prediction
- const int use_subpixel = 0;
-
- // Start getting the features
- int f_idx = 0;
-
- // Q_INDEX
- const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8);
- aom_clear_system_state();
- features[f_idx++] = logf(1.0f + (float)(dc_q * dc_q) / 256.0f);
-
- // VARIANCE
- unsigned int sse = 0;
- unsigned int var = 0;
- const MV ref_mv_full = { .row = 0, .col = 0 };
- av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full,
- use_subpixel, &sse, &var);
- aom_clear_system_state();
- features[f_idx++] = logf(1.0f + (float)var);
-
- // Regional
- const uint8_t *src = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *dst = xd->plane[0].dst.buf;
- const int dst_stride = xd->plane[0].dst.stride;
- const int bw = block_size_wide[bsize];
- const int bh = block_size_high[bsize];
- const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- int r_idx = 0;
- for (r_idx = 0; r_idx < 4; r_idx++) {
- const int x_idx = (r_idx & 1) * bw / 2;
- const int y_idx = (r_idx >> 1) * bh / 2;
- const int src_offset = y_idx * src_stride + x_idx;
- const int dst_offset = y_idx * dst_stride + x_idx;
- const unsigned int sub_var = cpi->fn_ptr[subsize].vf(
- src + src_offset, src_stride, dst + dst_offset, dst_stride, &sse);
- aom_clear_system_state();
- const float var_ratio = (1.0f + (float)sub_var) / (4.0f + (float)var);
- features[f_idx++] = var_ratio;
- }
-}
-
-static void simple_motion_search_based_split_fast(
- AV1_COMP *const cpi, MACROBLOCK *x, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *partition_none_allowed, int *partition_horz_allowed,
- int *partition_vert_allowed, int *do_rectangular_split,
- int *do_square_split) {
- aom_clear_system_state();
- const NN_CONFIG *nn_config = NULL;
- float split_only_thresh = 1.0f;
- if (bsize == BLOCK_128X128) {
- nn_config = &av1_simple_motion_search_based_split_nn_config_128;
- split_only_thresh = av1_simple_motion_search_based_split_thresh_128;
- } else if (bsize == BLOCK_64X64) {
- nn_config = &av1_simple_motion_search_based_split_nn_config_64;
- split_only_thresh = av1_simple_motion_search_based_split_thresh_64;
- } else if (bsize == BLOCK_32X32) {
- nn_config = &av1_simple_motion_search_based_split_nn_config_32;
- split_only_thresh = av1_simple_motion_search_based_split_thresh_32;
- } else if (bsize == BLOCK_16X16) {
- nn_config = &av1_simple_motion_search_based_split_nn_config_16;
- split_only_thresh = av1_simple_motion_search_based_split_thresh_16;
- } else if (bsize == BLOCK_8X8) {
- return;
- } else {
- assert(0 && "Unexpected block size in simple_motion_based_split");
- return;
- }
-
- float features[FEATURE_SIZE_SMS_SPLIT_FAST] = { 0.0f };
- float score = 0.0f;
- get_res_var_features(cpi, x, mi_row, mi_col, bsize, features);
- av1_nn_predict(features, nn_config, &score);
- aom_clear_system_state();
-
- if (score > split_only_thresh) {
- *partition_none_allowed = 0;
- *partition_horz_allowed = 0;
- *partition_vert_allowed = 0;
- *do_rectangular_split = 0;
- }
- if (cpi->sf.simple_motion_search_split_only >= 2) {
- if (score < -split_only_thresh) *do_square_split = 0;
- // For larger scores (>split_only_thresh), none and rectangular partitions
- // are skipped. As score reduces, possibility of split decreases. Hence
- // for near larger scores (.875 * split_only_thresh to split_only_thresh)
- // none partition is disabled, but rectangular partitions are evaluated
- // additionally.
- if (score > (split_only_thresh * 0.875)) *partition_none_allowed = 0;
- }
-}
-
-static int convert_bsize_to_idx(BLOCK_SIZE bsize) {
+static INLINE int convert_bsize_to_idx(BLOCK_SIZE bsize) {
switch (bsize) {
case BLOCK_128X128: return 0;
case BLOCK_64X64: return 1;
@@ -150,15 +41,6 @@
int mi_col, BLOCK_SIZE bsize, int *partition_none_allowed,
int *partition_horz_allowed, int *partition_vert_allowed,
int *do_rectangular_split, int *do_square_split) {
- if (cpi->sf.simple_motion_search_split_speed >= 2) {
- simple_motion_search_based_split_fast(
- cpi, x, mi_row, mi_col, bsize, partition_none_allowed,
- partition_horz_allowed, partition_vert_allowed, do_rectangular_split,
- do_square_split);
-
- return;
- }
-
aom_clear_system_state();
const AV1_COMMON *const cm = &cpi->common;
@@ -204,7 +86,7 @@
*do_rectangular_split = 0;
}
- if (cpi->sf.simple_motion_search_split_only >= 2 && score < no_split_thresh) {
+ if (cpi->sf.simple_motion_search_split >= 2 && score < no_split_thresh) {
*do_square_split = 0;
}
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 5794983..ef312f6 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -117,6 +117,8 @@
sf->ml_early_term_after_part_split_level = 1;
}
+ // TODO(chiyotsai@google.com): Try to replace two pass partition search with
+ // other speed features.
if (is_720p_or_larger && speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL_START &&
speed < CONFIG_2PASS_PARTITION_SEARCH_LVL_END) {
sf->two_pass_partition_search = 1;
@@ -128,7 +130,7 @@
} else if (is_480p_or_larger) {
sf->use_square_partition_only_threshold = BLOCK_64X64;
- sf->simple_motion_search_split_only = 2;
+ sf->simple_motion_search_split = 2;
} else {
sf->use_square_partition_only_threshold = BLOCK_32X32;
}
@@ -141,9 +143,6 @@
sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
sf->firstpass_simple_motion_search_early_term = 1;
- // TODO(chiyotsai@google.com): Try to disable two pass partition search
- // and turn on hdres
- sf->simple_motion_search_split_speed = 1;
sf->ml_early_term_after_part_split_level = 2;
}
}
@@ -172,7 +171,6 @@
}
if (speed >= 3) {
- sf->simple_motion_search_split_speed = 2;
sf->ml_early_term_after_part_split_level = 0;
if (is_720p_or_larger) {
sf->partition_search_breakout_dist_thr = (1 << 25);
@@ -186,11 +184,12 @@
sf->two_pass_partition_search;
// TODO(Venkat): Clean-up frame type dependency for
- // simple_motion_search_split_only in partition search function and set the
+ // simple_motion_search_split in partition search function and set the
// speed feature accordingly
- // TODO(Venkat): Evaluate this speed feature for speed 1 & 2
- sf->simple_motion_search_split_only =
- cm->allow_screen_content_tools ? 1 : 2;
+ // TODO(any): The models and thresholds used by simple_motion_split is
+ // trained and tuned on speed 1 and 2. We might get better performance if we
+ // readjust them for speed 3 and 4.
+ sf->simple_motion_search_split = cm->allow_screen_content_tools ? 1 : 2;
}
if (speed >= 4) {
@@ -269,7 +268,7 @@
// speed.
sf->prune_single_motion_modes_by_simple_trans = 1;
- sf->simple_motion_search_split_only = 1;
+ sf->simple_motion_search_split = 1;
sf->simple_motion_search_early_term_none = 1;
sf->disable_wedge_search_var_thresh = 0;
@@ -742,7 +741,6 @@
sf->skip_obmc_in_uniform_mv_field = 0;
sf->skip_wm_in_uniform_mv_field = 0;
sf->adaptive_interp_filter_search = 0;
- sf->simple_motion_search_split_speed = 2;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
@@ -768,7 +766,7 @@
for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
sf->ml_partition_search_breakout_thresh[i] = -1; // -1 means not enabled.
}
- sf->simple_motion_search_split_only = 0;
+ sf->simple_motion_search_split = 0;
sf->simple_motion_search_prune_rect = 0;
sf->simple_motion_search_early_term_none = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 358f4c9..ef86923 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -648,14 +648,11 @@
int simple_motion_search_prune_rect;
// Perform simple motion search before none_partition to decide if we
- // want to split directly without trying other partition types.
- int simple_motion_search_split_only;
-
- // Determines the type of model used by simple_motion_search_split_only. Only
- // valids when simple_motion_search_split_only is >= 1. Set to 1 for the
- // slower model that uses 5 subpixel searches, and 2 for the faster model that
- // uses 1 fullpixel search.
- int simple_motion_search_split_speed;
+ // want to remove all partitions other than PARTITION_SPLIT. If set to 0, this
+ // model is disabled. If set to 1, the model attempts to perform
+ // PARTITION_SPLIT only. If set to 2, the model also attempts to prune
+ // PARTITION_SPLIT.
+ int simple_motion_search_split;
// Use features from simple_motion_search to terminate prediction block
// partition after PARTITION_NONE