RTC: Adaptively downgrade subpixel search
This commit adds the use_adaptive_subpel_search speed feature, which
downgrades the subpel search to av1_find_best_sub_pixel_tree_pruned_more
when either x->content_state_sb.source_sad_nonrd <= kLowSad, or the
corresponding fullpel search returns low residue variance.
For reference, if the encoder always uses
av1_find_best_sub_pixel_tree_pruned_more, there is about 1.97% encoding
time reduction @ 0.366% BDRate loss.
| SPD_SET | TESTSET | AVG_PSNR | OVR_PSNR | SSIM | ENC_T |
|---------|----------|----------|----------|---------|-------|
| 7 | rtc | +0.015% | +0.039% | +0.085% | -1.2% |
| 7 | rtc_derf | +0.018% | +0.051% | -0.008% | -1.1% |
|---------|----------|----------|----------|---------|-------|
| 8 | rtc | -0.027% | -0.051% | -0.073% | -1.3% |
| 8 | rtc_derf | -0.286% | -0.293% | -0.310% | -1.1% |
STATS_CHANGED
Change-Id: I623f138694c1e85b1ac8112027d6adbaec6517d7
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 47dbf87..39c2a87 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -266,10 +266,11 @@
MACROBLOCKD *xd = &x->e_mbd;
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
+ const SPEED_FEATURES *sf = &cpi->sf;
MB_MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
- int step_param = (cpi->sf.rt_sf.fullpel_search_step_param)
- ? cpi->sf.rt_sf.fullpel_search_step_param
+ int step_param = (sf->rt_sf.fullpel_search_step_param)
+ ? sf->rt_sf.fullpel_search_step_param
: cpi->mv_search_params.mv_step_param;
FULLPEL_MV start_mv;
const int ref = mi->ref_frame[0];
@@ -299,7 +300,7 @@
else
center_mv = tmp_mv->as_mv;
- const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
+ const SEARCH_METHODS search_method = sf->mv_sf.search_method;
const MotionVectorSearchParams *mv_search_params = &cpi->mv_search_params;
const int ref_stride = xd->plane[0].pre[0].stride;
const search_site_config *src_search_sites = av1_get_search_site_config(
@@ -326,27 +327,37 @@
SUBPEL_MOTION_SEARCH_PARAMS ms_params;
av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
cost_list);
- if (cpi->sf.rt_sf.reduce_mv_pel_precision &&
- cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
+ if (sf->rt_sf.reduce_mv_pel_precision &&
+ sf->mv_sf.subpel_force_stop < HALF_PEL)
ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv);
- if (cpi->sf.rt_sf.reduce_zeromv_mvres && ref_mv.row == 0 &&
- ref_mv.col == 0 && start_mv.row == 0 && start_mv.col == 0) {
+ const bool fullpel_performed_well =
+ (bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
+ (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
+ (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127);
+ if (sf->rt_sf.reduce_zeromv_mvres && ref_mv.row == 0 && ref_mv.col == 0 &&
+ start_mv.row == 0 && start_mv.col == 0) {
// If both the refmv and the fullpel results show zero mv, then there is
// high likelihood that the current block is static. So we can try to
// reduce the mv resolution here.
// These thresholds are the mean var rd collected from multiple encoding
// runs.
- if ((bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
- (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
- (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127)) {
+ if (fullpel_performed_well) {
ms_params.forced_stop = HALF_PEL;
}
}
MV subpel_start_mv = get_mv_from_fullmv(&tmp_mv->as_fullmv);
- cpi->mv_search_params.find_fractional_mv_step(
- xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
- &x->pred_sse[ref], NULL);
+ if (sf->rt_sf.use_adaptive_subpel_search &&
+ (fullpel_performed_well ||
+ x->content_state_sb.source_sad_nonrd <= kLowSad)) {
+ av1_find_best_sub_pixel_tree_pruned_more(xd, cm, &ms_params,
+ subpel_start_mv, &tmp_mv->as_mv,
+ &dis, &x->pred_sse[ref], NULL);
+ } else {
+ cpi->mv_search_params.find_fractional_mv_step(
+ xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
+ &x->pred_sse[ref], NULL);
+ }
*rate_mv =
av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->mv_costs->nmv_joint_cost,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index f6fc32a..95340cc 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1679,6 +1679,7 @@
sf->rt_sf.prune_compoundmode_with_singlecompound_var = true;
sf->rt_sf.prune_compoundmode_with_singlemode_var = true;
sf->rt_sf.skip_compound_based_on_var = true;
+ sf->rt_sf.use_adaptive_subpel_search = true;
}
if (speed >= 8) {
@@ -1709,6 +1710,7 @@
sf->rt_sf.frame_level_mode_cost_update = true;
sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
sf->rt_sf.reduce_mv_pel_precision = 0;
+ sf->rt_sf.use_adaptive_subpel_search = false;
// For multi-thread use case with row_mt enabled, enable top right
// dependency wait of threads at mi level.
if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) {
@@ -2062,6 +2064,7 @@
rt_sf->skip_compound_based_on_var = false;
rt_sf->top_right_sync_wait_in_mis = false;
rt_sf->set_zeromv_skip_based_on_source_sad = 1;
+ rt_sf->use_adaptive_subpel_search = false;
}
// Populate appropriate sub-pel search method based on speed feature and user
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 0c23572..6261b4c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1623,6 +1623,10 @@
// 1: If source sad is kZeroSad
// 2: If source sad <= kVeryLowSad
int set_zeromv_skip_based_on_source_sad;
+
+ // Downgrades the subpel search to av1_find_best_sub_pixel_tree_pruned_more
+ // when either the fullpel search performed well, or when zeromv has low sad.
+ bool use_adaptive_subpel_search;
} REAL_TIME_SPEED_FEATURES;
/*!\endcond */