Add skip_related_full_newmv speed feature
This CL introduces a new speed feature that prunes the current
ref_mv in NEW_MV mode after a full_pixel search if we have already
encountered another ref_mv in the drl such that:
1. The other drl has the same fullpel_mv during the SIMPLE_TRANSLATION
search process as the current fullpel_mv.
2. The rate needed to encode the current fullpel_mv is larger than that
for the other ref_mv.
This is turned on on speed 3 and above.
Performance on midres:
SPD_SET | AVG_PSNR | OVR_PSNR | SSIM | SPD |
3 | +0.150% | +0.157% | +0.084% | +3.8% |
4 | +0.129% | +0.133% | +0.114% | +3.6% |
5 | +0.120% | +0.128% | +0.135% | +3.1% |
STATS_CHANGED
Change-Id: Ic29d553437cba15fdac66ef9ed304df19ad59df2
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 3a0fd2e..870a308 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -20,7 +20,7 @@
void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
- int search_range) {
+ int search_range, inter_mode_info *mode_info) {
MACROBLOCKD *xd = &x->e_mbd;
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
@@ -150,9 +150,50 @@
x->mv_limits = tmp_mv_limits;
+ // Terminate search with the current ref_idx if we have already encountered
+ // another ref_mv in the drl such that:
+ // 1. The other drl has the same fullpel_mv during the SIMPLE_TRANSLATION
+ // search process as the current fullpel_mv.
+ // 2. The rate needed to encode the current fullpel_mv is larger than that
+ // for the other ref_mv.
+ if (cpi->sf.inter_sf.skip_repeated_full_newmv &&
+ mbmi->motion_mode == SIMPLE_TRANSLATION &&
+ x->best_mv.as_int != INVALID_MV) {
+ int_mv this_mv;
+ this_mv.as_mv = get_mv_from_fullmv(&x->best_mv.as_fullmv);
+ const int ref_mv_idx = mbmi->ref_mv_idx;
+ const int this_mv_rate =
+ av1_mv_bit_cost(&this_mv.as_mv, &ref_mv, x->nmv_vec_cost,
+ x->mv_cost_stack, MV_COST_WEIGHT);
+ mode_info[ref_mv_idx].full_search_mv.as_int = this_mv.as_int;
+ mode_info[ref_mv_idx].full_mv_rate = this_mv_rate;
+
+ for (int prev_ref_idx = 0; prev_ref_idx < ref_mv_idx; ++prev_ref_idx) {
+ // Check if the motion search result same as previous results
+ if (this_mv.as_int == mode_info[prev_ref_idx].full_search_mv.as_int) {
+ // Compare the rate cost
+ const int prev_rate_cost = mode_info[prev_ref_idx].full_mv_rate +
+ mode_info[prev_ref_idx].drl_cost;
+ const int this_rate_cost =
+ this_mv_rate + mode_info[ref_mv_idx].drl_cost;
+
+ if (prev_rate_cost <= this_rate_cost) {
+ // If the current rate_cost is worse than the previous rate_cost, then
+ // we terminate the search. Since av1_single_motion_search is only
+ // called by handle_new_mv in SIMPLE_TRANSLATION mode, we set the
+ // best_mv to INVALID mv to signal that we wish to terminate search
+ // for the current mode.
+ x->best_mv.as_int = INVALID_MV;
+ return;
+ }
+ }
+ }
+ }
+
if (cpi->common.cur_frame_force_integer_mv) {
convert_fullmv_to_mv(&x->best_mv);
}
+
const int use_fractional_mv =
bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
if (use_fractional_mv) {
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h
index 960df34..bf5d635 100644
--- a/av1/encoder/motion_search_facade.h
+++ b/av1/encoder/motion_search_facade.h
@@ -18,9 +18,20 @@
extern "C" {
#endif
+typedef struct {
+ int64_t rd;
+ int drl_cost;
+
+ int rate_mv;
+ int_mv mv;
+
+ int_mv full_search_mv;
+ int full_mv_rate;
+} inter_mode_info;
+
void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
- int search_range);
+ int search_range, inter_mode_info *mode_info);
void av1_joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int_mv *cur_mv,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 083e348..0c170d0 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1055,8 +1055,8 @@
static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
const BLOCK_SIZE bsize, int_mv *cur_mv,
- int *const rate_mv,
- HandleInterModeArgs *const args) {
+ int *const rate_mv, HandleInterModeArgs *const args,
+ inter_mode_info *mode_info) {
const MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = xd->mi[0];
const int is_comp_pred = has_second_ref(mbmi);
@@ -1165,7 +1165,8 @@
}
}
- av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range);
+ av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
+ mode_info);
if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv;
@@ -1358,7 +1359,7 @@
const uint32_t cur_mv = mbmi->mv[0].as_int;
assert(!is_comp_pred);
if (have_newmv_in_inter_mode(this_mode)) {
- av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX);
+ av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL);
mbmi->mv[0].as_int = x->best_mv.as_int;
tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
}
@@ -1890,13 +1891,6 @@
return false;
}
-typedef struct {
- int64_t rd;
- int drl_cost;
- int rate_mv;
- int_mv mv;
-} inter_mode_info;
-
// Compute the estimated RD cost for the motion vector with simple translation.
static int64_t simple_translation_pred_rd(
AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats,
@@ -2129,6 +2123,7 @@
const int base_rate =
args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
+ mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
mode_info[ref_mv_idx].rd = INT64_MAX;
if (!mask_check_bit(idx_mask, ref_mv_idx)) {
@@ -2175,7 +2170,8 @@
cur_mv[0] = args->single_newmv[ref_mv_idx][ref0];
rate_mv = args->single_newmv_rate[ref_mv_idx][ref0];
} else {
- newmv_ret_val = handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args);
+ newmv_ret_val =
+ handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
}
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, handle_newmv_time);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 2adb00e..d5c9bc9 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -445,6 +445,7 @@
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
sf->mv_sf.search_method = DIAMOND;
+
sf->inter_sf.disable_sb_level_mv_cost_upd = 1;
// TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
// it with cpi->sf.disable_wedge_search_var_thresh.
@@ -456,6 +457,7 @@
sf->inter_sf.prune_motion_mode_level = boosted ? 2 : 3;
sf->inter_sf.selective_ref_frame = 4;
sf->inter_sf.skip_repeated_ref_mv = 1;
+ sf->inter_sf.skip_repeated_full_newmv = 1;
if (cpi->oxcf.enable_smooth_interintra)
sf->inter_sf.disable_smooth_interintra = boosted ? 0 : 1;
sf->inter_sf.reuse_compound_type_decision = 1;
@@ -966,6 +968,7 @@
inter_sf->prune_comp_search_by_single_result = 0;
inter_sf->skip_repeated_ref_mv = 0;
inter_sf->skip_repeated_newmv = 0;
+ inter_sf->skip_repeated_full_newmv = 0;
inter_sf->prune_single_motion_modes_by_simple_trans = 0;
inter_sf->inter_mode_rd_model_estimation = 0;
inter_sf->prune_compound_using_single_ref = 0;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 3facef7..bb5cb6d 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -523,6 +523,14 @@
// flag to skip NEWMV mode in drl if the motion search result is the same
int skip_repeated_newmv;
+ // Skip the current ref_mv in NEW_MV mode if we have already encountered
+ // another ref_mv in the drl such that:
+ // 1. The other drl has the same fullpel_mv during the SIMPLE_TRANSLATION
+ // search process as the current fullpel_mv.
+ // 2. The rate needed to encode the current fullpel_mv is larger than that
+ // for the other ref_mv.
+ int skip_repeated_full_newmv;
+
// This speed feature checks duplicate ref MVs among NEARESTMV, NEARMV,
// GLOBALMV and skips NEARMV or GLOBALMV (in order) if a duplicate is found
// TODO(any): Instead of skipping repeated ref mv, use the recalculated