Refactor subpel motion estimation
Introduced early exit during subpel motion estimation around
second_best_mv based on subpel mvs corresponding to best_mv.
When tested for 20 frames of BasketBallDrill_832x480_50 at 1 mbps,
observed ~0.5% reduction in encoder time for speed=1 preset.
Change-Id: If721fb1cae4eccdb6616d3d7e8fc377aea913bd8
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index bf95d2b..98691d9 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -368,6 +368,9 @@
// Store the second best motion vector during full-pixel motion search
int_mv second_best_mv;
+ // Store the fractional best motion vector during sub/Qpel-pixel motion search
+ int_mv fractional_best_mv[3];
+
// use default transform and skip transform type search for intra modes
int use_default_intra_tx_type;
// use default transform and skip transform type search for inter modes
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
index 1a35ff7..f0b537a 100644
--- a/av1/encoder/mbgraph.c
+++ b/av1/encoder/mbgraph.c
@@ -60,7 +60,7 @@
x, &cpi->common, mb_row, mb_col, ref_mv,
cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0,
mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL,
- NULL, &distortion, &sse, NULL, NULL, 0, 0, 0, 0, 0);
+ NULL, &distortion, &sse, NULL, NULL, 0, 0, 0, 0, 0, 1);
}
if (has_second_ref(xd->mi[0]))
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 4f3c4e2..f11c4a0 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -409,7 +409,7 @@
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
+ int use_accurate_subpel_search, const int do_reset_fractional_mv) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
src_address, src_stride, y, y_stride,
@@ -426,6 +426,7 @@
(void)cm;
(void)mi_row;
(void)mi_col;
+ (void)do_reset_fractional_mv;
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
@@ -481,12 +482,13 @@
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
+ int use_accurate_subpel_search, const int do_reset_fractional_mv) {
SETUP_SUBPEL_SEARCH;
(void)use_accurate_subpel_search;
(void)cm;
(void)mi_row;
(void)mi_col;
+ (void)do_reset_fractional_mv;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
src_address, src_stride, y, y_stride,
@@ -549,12 +551,13 @@
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
+ int use_accurate_subpel_search, const int do_reset_fractional_mv) {
SETUP_SUBPEL_SEARCH;
(void)use_accurate_subpel_search;
(void)cm;
(void)mi_row;
(void)mi_col;
+ (void)do_reset_fractional_mv;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp,
src_address, src_stride, y, y_stride,
@@ -750,7 +753,7 @@
int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
+ int use_accurate_subpel_search, const int do_reset_fractional_mv) {
const uint8_t *const src_address = x->plane[0].src.buf;
const int src_stride = x->plane[0].src.stride;
MACROBLOCKD *xd = &x->e_mbd;
@@ -796,7 +799,16 @@
(void)cost_list; // to silence compiler warning
+ if (do_reset_fractional_mv) {
+ av1_set_fractional_mv(x->fractional_best_mv);
+ }
+
for (iter = 0; iter < round; ++iter) {
+ if ((x->fractional_best_mv[iter].as_mv.row == br) &&
+ (x->fractional_best_mv[iter].as_mv.col == bc))
+ return INT_MAX;
+ x->fractional_best_mv[iter].as_mv.row = br;
+ x->fractional_best_mv[iter].as_mv.col = bc;
// Check vertical and horizontal sub-pixel positions.
for (idx = 0; idx < 4; ++idx) {
tr = br + search_step[idx].row;
@@ -2827,16 +2839,14 @@
(void)thismse; \
(void)cost_list;
// Return the maximum MV.
-int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *ref_mv,
- int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
+int av1_return_max_sub_pixel_mv(
+ MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
+ const MV *ref_mv, int allow_hp, int error_per_bit,
+ const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+ int mask_stride, int invert_mask, int w, int h,
+ int use_accurate_subpel_search, const int do_reset_fractional_mv) {
COMMON_MV_TEST;
(void)mask;
(void)mask_stride;
@@ -2847,6 +2857,7 @@
(void)cm;
(void)mi_row;
(void)mi_col;
+ (void)do_reset_fractional_mv;
bestmv->row = maxr;
bestmv->col = maxc;
@@ -2857,16 +2868,14 @@
return besterr;
}
// Return the minimum MV.
-int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm,
- int mi_row, int mi_col, const MV *ref_mv,
- int allow_hp, int error_per_bit,
- const aom_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- int *distortion, unsigned int *sse1,
- const uint8_t *second_pred, const uint8_t *mask,
- int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search) {
+int av1_return_min_sub_pixel_mv(
+ MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col,
+ const MV *ref_mv, int allow_hp, int error_per_bit,
+ const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
+ int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
+ unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask,
+ int mask_stride, int invert_mask, int w, int h,
+ int use_accurate_subpel_search, const int do_reset_fractional_mv) {
COMMON_MV_TEST;
(void)maxr;
(void)maxc;
@@ -2877,6 +2886,7 @@
(void)cm;
(void)mi_row;
(void)mi_col;
+ (void)do_reset_fractional_mv;
bestmv->row = minr;
bestmv->col = minc;
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index a975218..3f8b3b1 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -103,7 +103,7 @@
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
int *distortion, unsigned int *sse1, const uint8_t *second_pred,
const uint8_t *mask, int mask_stride, int invert_mask, int w, int h,
- int use_accurate_subpel_search);
+ int use_accurate_subpel_search, const int do_reset_fractional_mv);
extern fractional_mv_step_fp av1_find_best_sub_pixel_tree;
extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned;
@@ -154,6 +154,12 @@
int mi_row, int mi_col, int *pts0,
int *pts_inref0, int total_samples);
+static INLINE void av1_set_fractional_mv(int_mv *fractional_best_mv) {
+ for (int z = 0; z < 3; z++) {
+ fractional_best_mv[z].as_int = INVALID_MV;
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index a018d8b..f5a1196 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -6593,7 +6593,7 @@
cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
- mask_stride, id, pw, ph, cpi->sf.use_accurate_subpel_search);
+ mask_stride, id, pw, ph, cpi->sf.use_accurate_subpel_search, 1);
}
// Restore the pointer to the first prediction buffer.
@@ -6958,13 +6958,12 @@
x->second_best_mv.as_int != x->best_mv.as_int;
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
-
best_mv_var = cpi->find_fractional_mv_step(
x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
- 0, 0, pw, ph, 1);
+ 0, 0, pw, ph, 1, 1);
if (try_second) {
const int minc =
@@ -6989,7 +6988,7 @@
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
- &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
+ &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1, 0);
if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
x->best_mv.as_mv = best_mv;
}
@@ -7000,7 +6999,7 @@
x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
- 0, 0, 0, 0, 0);
+ 0, 0, 0, 0, 0, 1);
}
break;
case OBMC_CAUSAL:
@@ -7176,7 +7175,7 @@
cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
- ref_idx, pw, ph, cpi->sf.use_accurate_subpel_search);
+ ref_idx, pw, ph, cpi->sf.use_accurate_subpel_search, 1);
}
// Restore the pointer to the first unscaled prediction buffer.
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 75fdf02..374ea23 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -259,7 +259,7 @@
cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL,
- NULL, 0, 0, 0, 0, 0);
+ NULL, 0, 0, 0, 0, 0, 1);
}
x->e_mbd.mi[0]->mv[0] = x->best_mv;