Improve VBP motion estimation for RTC speed >= 9
For RTC speed >= 9, enabled evaluating of the neighbours' MVs
for super blocks with low and medium source SAD. This sped up
the RTC encoder.
Bord test results:
avg_psnr: ovr_psnr: ssim: encoding_spdup:
speed 9:
rtc_derf: 0.063 0.059 0.091 0.681
rtc: 0.303 0.302 0.604 1.899
rtc_1080p: 0.082 0.082 0.035 0.639
speed 10:
rtc: 0.342 0.360 0.682 1.817
rtc_1080p: -0.168 -0.166 0.073 0.450
STATS_CHANGED
Change-Id: I837773254d87b82bc064128ac4afa616492a0454
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 3fd1bcc..7e244d0 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1305,6 +1305,10 @@
sf->rt_sf.use_adaptive_subpel_search = false;
}
if (speed >= 10) {
+ // TODO(yunqingwang@google.com): To be conservative, disable
+ // sf->rt_sf.estimate_motion_for_var_based_partition = 3 for speed 10/qvga
+ // for now. May enable it in the future.
+ sf->rt_sf.estimate_motion_for_var_based_partition = 0;
sf->rt_sf.skip_intra_pred = 2;
sf->rt_sf.hybrid_intra_pickmode = 3;
sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
@@ -1443,6 +1447,7 @@
// estimate_motion_for_var_based_partition == 2 helps here.
if (sf->rt_sf.estimate_motion_for_var_based_partition == 2)
sf->rt_sf.estimate_motion_for_var_based_partition = 1;
+ if (speed >= 9) sf->rt_sf.estimate_motion_for_var_based_partition = 0;
}
// Screen settings.
if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
@@ -1506,6 +1511,7 @@
// for screen contents.
if (sf->rt_sf.estimate_motion_for_var_based_partition == 2)
sf->rt_sf.estimate_motion_for_var_based_partition = 1;
+ if (speed >= 9) sf->rt_sf.estimate_motion_for_var_based_partition = 0;
}
}
@@ -1777,7 +1783,7 @@
}
if (speed >= 9) {
sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
- sf->rt_sf.estimate_motion_for_var_based_partition = 0;
+ sf->rt_sf.estimate_motion_for_var_based_partition = 3;
sf->rt_sf.prefer_large_partition_blocks = 3;
sf->rt_sf.skip_intra_pred = 2;
sf->rt_sf.var_part_split_threshold_shift = 9;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index fd475d9..da0fd7a 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1396,6 +1396,7 @@
// 0 - Only use zero MV
// 1 - perform coarse ME
// 2 - perform coarse ME, and also use neighbours' MVs
+ // 3 - use neighbours' MVs without performing coarse ME
int estimate_motion_for_var_based_partition;
// For nonrd_use_partition: mode of extra check of leaf partition
diff --git a/av1/encoder/var_based_part.c b/av1/encoder/var_based_part.c
index f9dd0a7..995b64e 100644
--- a/av1/encoder/var_based_part.c
+++ b/av1/encoder/var_based_part.c
@@ -1210,9 +1210,20 @@
}
}
+static AOM_FORCE_INLINE int mv_distance(const FULLPEL_MV *mv0,
+ const FULLPEL_MV *mv1) {
+ return abs(mv0->row - mv1->row) + abs(mv0->col - mv1->col);
+}
+
static AOM_INLINE void evaluate_neighbour_mvs(AV1_COMP *cpi, MACROBLOCK *x,
unsigned int *y_sad,
- bool is_small_sb) {
+ bool is_small_sb,
+ int est_motion) {
+ const int source_sad_nonrd = x->content_state_sb.source_sad_nonrd;
+ // TODO(yunqingwang@google.com): test if this condition works with other
+ // speeds.
+ if (est_motion > 2 && source_sad_nonrd > kMedSad) return;
+
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
MB_MODE_INFO *mi = xd->mi[0];
@@ -1227,6 +1238,7 @@
// Current best MV
FULLPEL_MV best_mv = get_fullmv_from_mv(&mi->mv[0].as_mv);
+ const int multi = (est_motion > 2 && source_sad_nonrd > kLowSad) ? 7 : 8;
if (xd->up_available) {
const MB_MODE_INFO *above_mbmi = xd->above_mbmi;
@@ -1236,7 +1248,7 @@
clamp_mv(&temp, &subpel_mv_limits);
above_mv = get_fullmv_from_mv(&temp);
- if (above_mv.row != best_mv.row || above_mv.col != best_mv.col) {
+ if (mv_distance(&best_mv, &above_mv) > 0) {
uint8_t const *ref_buf =
get_buf_from_fullmv(&xd->plane[0].pre[0], &above_mv);
above_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
@@ -1253,8 +1265,8 @@
clamp_mv(&temp, &subpel_mv_limits);
left_mv = get_fullmv_from_mv(&temp);
- if ((left_mv.row != best_mv.row || left_mv.col != best_mv.col) &&
- (left_mv.row != above_mv.row || left_mv.col != above_mv.col)) {
+ if (mv_distance(&best_mv, &left_mv) > 0 &&
+ mv_distance(&above_mv, &left_mv) > 0) {
uint8_t const *ref_buf =
get_buf_from_fullmv(&xd->plane[0].pre[0], &left_mv);
left_y_sad = cpi->ppi->fn_ptr[bsize].sdf(
@@ -1264,12 +1276,12 @@
}
}
- if (above_y_sad < *y_sad && above_y_sad < left_y_sad) {
+ if (above_y_sad < ((multi * *y_sad) >> 3) && above_y_sad < left_y_sad) {
*y_sad = above_y_sad;
mi->mv[0].as_mv = get_mv_from_fullmv(&above_mv);
clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
}
- if (left_y_sad < *y_sad && left_y_sad < above_y_sad) {
+ if (left_y_sad < ((multi * *y_sad) >> 3) && left_y_sad < above_y_sad) {
*y_sad = left_y_sad;
mi->mv[0].as_mv = get_mv_from_fullmv(&left_mv);
clamp_mv(&mi->mv[0].as_mv, &subpel_mv_limits);
@@ -1347,7 +1359,10 @@
mi->bsize = cm->seq_params->sb_size;
mi->mv[0].as_int = 0;
mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
- if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
+
+ const int est_motion =
+ cpi->sf.rt_sf.estimate_motion_for_var_based_partition;
+ if (est_motion == 1 || est_motion == 2) {
if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
const MV dummy_mv = { 0, 0 };
*y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
@@ -1365,9 +1380,8 @@
// Evaluate if neighbours' MVs give better predictions. Zero MV is tested
// already, so only non-zero MVs are tested here. Here the neighbour blocks
// are the first block above or left to this superblock.
- if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition == 2 &&
- (xd->up_available || xd->left_available))
- evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb);
+ if (est_motion >= 2 && (xd->up_available || xd->left_available))
+ evaluate_neighbour_mvs(cpi, x, y_sad, is_small_sb, est_motion);
*y_sad_last = *y_sad;
}