Add speed feature obmc_full_pixel_search_level
A full range full pixel motion search is done for
motion mode OBMC_CAUSAL, but the search result very
likely is just around the start point(The full range
search result of SIMPLE_TRANSLATION). Maybe only a
small range refine is needed.
This speed feature is controled by
sf.obmc_full_pixel_search_level.
Enabled at speed level 1 and above.
For speed level 0, still do a full range search.
For speed level 1 and above just do a small range
refine around start point.
For encoder, about 1.4% faster shows by encoding
20 frame of BasketballDrill_832x480_50.y4m at 800kbps
on speed 1. ( 211924 ms -> 208941 ms)
The coding performance is 0.01% gain on average.
STATS_CHANGED expected
Change-Id: I9b5bf28d1a7b2e60d24aa36cedfeb7665e3f3722
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index b3ab40c..7cd93f5 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -2702,11 +2702,12 @@
return best_sad;
}
-int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv, int is_second) {
+static int obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine,
+ const aom_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv,
+ int is_second) {
const int32_t *wsrc = x->wsrc_buf;
const int32_t *mask = x->mask_buf;
MV temp_mv;
@@ -2763,6 +2764,29 @@
return bestsme;
}
+int av1_obmc_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
+ int step_param, int sadpb, int further_steps,
+ int do_refine,
+ const aom_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv, int is_second) {
+ if (cpi->sf.obmc_full_pixel_search_level == 0) {
+ return obmc_full_pixel_diamond(cpi, x, mvp_full, step_param, sadpb,
+ further_steps, do_refine, fn_ptr, ref_mv,
+ dst_mv, is_second);
+ } else {
+ const int32_t *wsrc = x->wsrc_buf;
+ const int32_t *mask = x->mask_buf;
+ const int search_range = 8;
+ *dst_mv = *mvp_full;
+ int thissme = obmc_refining_search_sad(
+ x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second);
+ if (thissme < INT_MAX)
+ thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1,
+ is_second);
+ return thissme;
+ }
+}
+
// Note(yunqingwang): The following 2 functions are only used in the motion
// vector unit test, which return extreme motion vectors allowed by the MV
// limits.
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 532516c..a975218 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -134,11 +134,11 @@
int *cost_list, const MV *ref_mv, int var_max, int rd,
int x_pos, int y_pos, int intra);
-int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param, int sadpb,
- int further_steps, int do_refine,
- const aom_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv, int is_second);
+int av1_obmc_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, int sadpb,
+ int further_steps, int do_refine,
+ const aom_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv, int is_second);
int av1_find_best_obmc_sub_pixel_tree_up(
MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 12e7645..9a1d1f9 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -6920,10 +6920,10 @@
(MI_SIZE * mi_col), (MI_SIZE * mi_row), 0);
break;
case OBMC_CAUSAL:
- bestsme = av1_obmc_full_pixel_diamond(
- cpi, x, &mvp_full, step_param, sadpb,
- MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
- &(x->best_mv.as_mv), 0);
+ bestsme = av1_obmc_full_pixel_search(cpi, x, &mvp_full, step_param, sadpb,
+ MAX_MVSEARCH_STEPS - 1 - step_param,
+ 1, &cpi->fn_ptr[bsize], &ref_mv,
+ &(x->best_mv.as_mv), 0);
break;
default: assert(0 && "Invalid motion mode!\n");
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index a3d34ef..562339d 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -222,6 +222,7 @@
sf->dual_sgr_penalty_level = 1;
sf->use_accurate_subpel_search = 1;
sf->reuse_inter_intra_mode = 1;
+ sf->obmc_full_pixel_search_level = 1;
}
if (speed >= 2) {
@@ -514,6 +515,7 @@
sf->dual_sgr_penalty_level = 0;
sf->inter_mode_rd_model_estimation = 0;
+ sf->obmc_full_pixel_search_level = 0;
if (oxcf->mode == GOOD)
set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 9238602..8202e03 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -642,6 +642,11 @@
// Reuse the inter_intra_mode search result from NEARESTMV mode to other
// single ref modes
int reuse_inter_intra_mode;
+
+ // Set the full pixel search level of obmc
+ // 0: obmc_full_pixel_diamond
+ // 1: obmc_refining_search_sad (faster)
+ int obmc_full_pixel_search_level;
} SPEED_FEATURES;
struct AV1_COMP;