Use better motion search for temporal filtering
This is ported from VP9 work:
https://chromium-review.googlesource.com/c/webm/libvpx/+/1154488
https://chromium-review.googlesource.com/c/webm/libvpx/+/1157910
Tested encoding performance on lowres and midres with 30 frames.
Coding gains(ovr_psnr):
speed 0: lowres 0.06% midres 0.36%
speed 1: lowres 0.05% midres 0.29%
STATS_CHANGED
Change-Id: Ib30455465b34215285ca53603cf080c3b410504d
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index d93e01a..ee58802 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -2100,11 +2100,11 @@
}
int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- MV *mvp_full, int step_param, int error_per_bit,
+ MV *mvp_full, int step_param, int method,
+ int run_mesh_search, int error_per_bit,
int *cost_list, const MV *ref_mv, int var_max, int rd,
int x_pos, int y_pos, int intra) {
const SPEED_FEATURES *const sf = &cpi->sf;
- const SEARCH_METHODS method = sf->mv.search_method;
const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
int var = 0;
@@ -2169,11 +2169,35 @@
default: assert(0 && "Invalid search method.");
}
+ // Should we allow a follow on exhaustive search?
+ if (!run_mesh_search) {
+ if (method == NSTEP) {
+ if (is_exhaustive_allowed(cpi, x)) {
+ int exhuastive_thr = sf->exhaustive_searches_thresh;
+ exhuastive_thr >>=
+ 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
+ // Threshold variance for an exhaustive full search.
+ if (var > exhuastive_thr) run_mesh_search = 1;
+ }
+ }
+ }
+
+ if (run_mesh_search) {
+ int var_ex;
+ MV tmp_mv_ex;
+ var_ex = full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
+ cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
+ if (var_ex < var) {
+ var = var_ex;
+ x->best_mv.as_mv = tmp_mv_ex;
+ }
+ }
+
if (method != NSTEP && rd && var < var_max)
var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
do {
- if (!av1_use_hash_me(&cpi->common)) break;
+ if (!intra || !av1_use_hash_me(&cpi->common)) break;
// already single ME
// get block size and original buffer of current block
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 539e8f4..7df15d6 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -120,8 +120,9 @@
int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full, int step_param,
- int error_per_bit, int *cost_list, const MV *ref_mv,
- int var_max, int rd, int x_pos, int y_pos, int intra);
+ int method, int run_mesh_search, int error_per_bit,
+ int *cost_list, const MV *ref_mv, int var_max, int rd,
+ int x_pos, int y_pos, int intra);
int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
MV *mvp_full, int step_param, int sadpb,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c0aa935..f062870 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7014,10 +7014,10 @@
switch (mbmi->motion_mode) {
case SIMPLE_TRANSLATION:
- bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
- sadpb, cond_cost_list(cpi, cost_list),
- &ref_mv, INT_MAX, 1, (MI_SIZE * mi_col),
- (MI_SIZE * mi_row), 0);
+ bestsme = av1_full_pixel_search(
+ cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0,
+ sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1,
+ (MI_SIZE * mi_col), (MI_SIZE * mi_row), 0);
break;
case OBMC_CAUSAL:
bestsme = av1_obmc_full_pixel_diamond(
@@ -9673,8 +9673,8 @@
int sadpb = x->sadperbit16;
int cost_list[5];
int bestsme = av1_full_pixel_search(
- cpi, x, bsize, &mvp_full, step_param, sadpb,
- cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
+ cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0,
+ sadpb, cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
(MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
x->mv_limits = tmp_mv_limits;
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index d335dfc..054eea3 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -218,7 +218,8 @@
static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi,
uint8_t *arf_frame_buf,
uint8_t *frame_ptr_buf,
- int stride) {
+ int stride, int x_pos,
+ int y_pos) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
@@ -254,11 +255,9 @@
x->mvcost = x->mv_cost_stack;
x->nmvjointcost = x->nmv_vec_cost;
- // Use mv costing from x->mvcost directly
- av1_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
- cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0,
- &best_ref_mv1);
-
+ av1_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
+ NSTEP, 1, sadpb, cond_cost_list(cpi, cost_list),
+ &best_ref_mv1, 0, 0, x_pos, y_pos, 0);
x->mv_limits = tmp_mv_limits;
// Ignore mv costing by sending NULL pointer instead of cost array
@@ -374,7 +373,8 @@
// Find best match in this frame by MC
int err = temporal_filter_find_matching_mb_c(
cpi, frames[alt_ref_index]->y_buffer + mb_y_offset,
- frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride);
+ frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride,
+ mb_col * 16, mb_row * 16);
// Assign higher weight to matching MB if it's error
// score is lower. If not applying MC default behavior