Reduce full pixel motion search range
1. Added a speed 5 feature to reduce single motion search range based
on how close the ref_mvs are and the mv result of prior ref_mv.
2. Corrected calling of set_mv_search_params. This also gave some
speedups for lower speeds.
3. More work will follow.
Borg test results(150f):
avg_psnr: ovr_psnr: ssim: speedup:
speed 5:
lowres: -0.018 -0.025 -0.015 0.9%
midres: 0.029 0.027 0.025 1.0%
speed 2:
lowres: -0.003 -0.007 0.023 0.6%
midres: -0.001 0.002 0.032 0.6%
STATS_CHANGED
Change-Id: I8d0688ebcc10c417145aaf784f478001b577db68
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 1b24f88..d764ac1 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -192,7 +192,7 @@
// If auto_mv_step_size is enabled then keep track of the largest
// motion vector component used.
if (cpi->sf.mv_sf.auto_mv_step_size) {
- unsigned int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
+ int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
cpi->max_mv_magnitude = AOMMAX(maxv, cpi->max_mv_magnitude);
}
}
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 64d0513..d178056e 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3906,7 +3906,7 @@
static void set_mv_search_params(AV1_COMP *cpi) {
const AV1_COMMON *const cm = &cpi->common;
- const unsigned int max_mv_def = AOMMIN(cm->width, cm->height);
+ const int max_mv_def = AOMMAX(cm->width, cm->height);
// Default based on max resolution.
cpi->mv_step_param = av1_init_search_range(max_mv_def);
@@ -3917,14 +3917,15 @@
// after a key/intra-only frame.
cpi->max_mv_magnitude = max_mv_def;
} else {
- if (cm->show_frame) {
+ // Use cpi->max_mv_magnitude == -1 to exclude first pass case.
+ if (cm->show_frame && cpi->max_mv_magnitude != -1) {
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
cpi->mv_step_param = av1_init_search_range(
AOMMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
}
- cpi->max_mv_magnitude = 0;
+ cpi->max_mv_magnitude = -1;
}
}
}
@@ -4271,13 +4272,14 @@
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int ref_frame;
+ // TODO(yunqing): The following condition seems not work. Need to investigate.
if (width != cm->width || height != cm->height) {
// There has been a change in the encoded frame size
av1_set_size_literal(cpi, width, height);
- set_mv_search_params(cpi);
// Recalculate 'all_lossless' in case super-resolution was (un)selected.
cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
}
+ set_mv_search_params(cpi);
if (is_stat_consumption_stage(cpi)) {
av1_set_target_rate(cpi, cm->width, cm->height);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 0c9cc7c..ce16c7f 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -901,7 +901,7 @@
// sf contains fine-grained config set internally based on speed
SPEED_FEATURES sf;
- unsigned int max_mv_magnitude;
+ int max_mv_magnitude;
int mv_step_param;
int all_one_sided_refs;
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 884a44d..031c7cd 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -1891,8 +1891,7 @@
point as the best match, we will do a final 1-away diamond
refining search */
static int full_pixel_diamond(MACROBLOCK *x, MV *mvp_full, int step_param,
- int use_var, int sadpb, int further_steps,
- int *cost_list,
+ int use_var, int sadpb, int *cost_list,
const aom_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, const search_site_config *cfg,
uint8_t *second_pred, uint8_t *mask,
@@ -1920,6 +1919,7 @@
// If there won't be more n-step search, check to see if refining search is
// needed.
+ const int further_steps = cfg->ss_count - 1 - step_param;
while (n < further_steps) {
++n;
@@ -2379,9 +2379,9 @@
break;
case NSTEP:
case DIAMOND:
- var = full_pixel_diamond(x, mvp_full, step_param, use_var, error_per_bit,
- cfg->ss_count - 1 - step_param, cost_list,
- fn_ptr, ref_mv, cfg, NULL, NULL, 0, 0);
+ var =
+ full_pixel_diamond(x, mvp_full, step_param, use_var, error_per_bit,
+ cost_list, fn_ptr, ref_mv, cfg, NULL, NULL, 0, 0);
break;
default: assert(0 && "Invalid search method.");
}
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 3f383fd..0063ac1 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -15,7 +15,8 @@
#include "av1/encoder/reconinter_enc.h"
void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int ref_idx, int *rate_mv) {
+ BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
+ int search_range) {
MACROBLOCKD *xd = &x->e_mbd;
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
@@ -93,6 +94,21 @@
}
const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
+
+ // Further reduce the search range.
+ if (search_range < INT_MAX) {
+ const search_site_config *ss_cfg = &cpi->ss_cfg[SS_CFG_SRC];
+ // MAx step_param is ss_cfg->ss_count.
+ if (search_range < 1) {
+ step_param = ss_cfg->ss_count;
+ } else {
+ while (ss_cfg->radius[ss_cfg->ss_count - step_param - 1] >
+ (search_range << 1) &&
+ ss_cfg->ss_count - step_param - 1 > 0)
+ step_param++;
+ }
+ }
+
// Note: MV limits are modified here. Always restore the original values
// after full-pixel motion search.
av1_set_mv_search_range(&x->mv_limits, &ref_mv);
@@ -631,4 +647,4 @@
mbmi->mv[which].as_int = tmp_mv[which].as_int;
}
return tmp_rate_mv;
-}
\ No newline at end of file
+}
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h
index fda7a1f..4bfe06d 100644
--- a/av1/encoder/motion_search_facade.h
+++ b/av1/encoder/motion_search_facade.h
@@ -19,7 +19,8 @@
#endif
void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, int ref_idx, int *rate_mv);
+ BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
+ int search_range);
void av1_joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int_mv *cur_mv,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index fe1c359..bd1a800 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1499,7 +1499,43 @@
}
}
} else {
- av1_single_motion_search(cpi, x, bsize, 0, rate_mv);
+ // Single ref case.
+ const int ref_idx = 0;
+ int search_range = INT_MAX;
+
+ if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
+ const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
+ int min_mv_diff = INT_MAX;
+ int best_match = -1;
+ MV prev_ref_mv[2] = { { 0 } };
+ for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
+ prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
+ idx, x->mbmi_ext)
+ .as_mv;
+ const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
+ abs(ref_mv.col - prev_ref_mv[idx].col));
+
+ if (min_mv_diff > ref_mv_diff) {
+ min_mv_diff = ref_mv_diff;
+ best_match = idx;
+ }
+ }
+
+ if (min_mv_diff < (16 << 3)) {
+ if (args->single_newmv_valid[best_match][refs[0]]) {
+ search_range = min_mv_diff;
+ search_range +=
+ AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
+ prev_ref_mv[best_match].row),
+ abs(args->single_newmv[best_match][refs[0]].as_mv.col -
+ prev_ref_mv[best_match].col));
+ // Get full pixel search range.
+ search_range = (search_range + 4) >> 3;
+ }
+ }
+ }
+
+ av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range);
if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv;
@@ -1671,7 +1707,7 @@
const uint32_t cur_mv = mbmi->mv[0].as_int;
assert(!is_comp_pred);
if (have_newmv_in_inter_mode(this_mode)) {
- av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv);
+ av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX);
mbmi->mv[0].as_int = x->best_mv.as_int;
tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index d80914e..ce1ecd1 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -558,6 +558,8 @@
sf->inter_sf.disable_onesided_comp = 1;
sf->lpf_sf.disable_lr_filter = 1;
+
+ sf->mv_sf.reduce_search_range = 1;
}
}
@@ -920,6 +922,7 @@
mv_sf->adaptive_motion_search = 0;
mv_sf->use_accurate_subpel_search = USE_8_TAPS;
mv_sf->disable_hash_me = 0;
+ mv_sf->reduce_search_range = 0;
}
static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 08ba89c..e6b2f29 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -480,6 +480,9 @@
// Use to control hash generation and use of the same
// Applicable only for screen contents
int disable_hash_me;
+
+ // Reduce single motion search range based on MV result of prior ref_mv_idx.
+ int reduce_search_range;
} MV_SPEED_FEATURES;
typedef struct INTER_MODE_SPEED_FEATURES {
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 20b9d4d..943b0fa 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -89,7 +89,7 @@
const search_site_config ss_cfg = cpi->ss_cfg[SS_CFG_LOOKAHEAD];
const SEARCH_METHODS full_search_method = NSTEP;
const int step_param = av1_init_search_range(
- AOMMIN(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
+ AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
const int allow_high_precision_mv = cpi->common.allow_high_precision_mv;
const int subpel_iters_per_step = cpi->sf.mv_sf.subpel_iters_per_step;