Skip redundant SAD calculation
Redundant SAD calculation in the function
av1_refining_search_8p_c is skipped during
8 point motion vector refinement. When tested
for 10 frames of parkrun 720p50 content with
speed=1 preset, 0.3% encode time reduction is
seen.
Change-Id: I380d368a20d704ee62fb657428b60f85aad21083
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 6094aa8..a33aa24 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -2021,8 +2021,16 @@
const uint8_t *mask, int mask_stride,
int invert_mask, const MV *center_mv,
const uint8_t *second_pred) {
- const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
- { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
+ static const search_neighbors neighbors[8] = {
+ { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 },
+ { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 },
+ { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 },
+ { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 },
+ { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 },
+ { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 },
+ { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 },
+ { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 }
+ };
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
@@ -2030,6 +2038,10 @@
MV *best_mv = &x->best_mv.as_mv;
unsigned int best_sad = INT_MAX;
int i, j;
+ uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * SEARCH_GRID_STRIDE_8P] =
+ { 0 };
+ int grid_center = SEARCH_GRID_CENTER_8P;
+ int grid_coord = grid_center;
clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
@@ -2051,13 +2063,20 @@
mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
}
+ do_refine_search_grid[grid_coord] = 1;
+
for (i = 0; i < search_range; ++i) {
int best_site = -1;
for (j = 0; j < 8; ++j) {
- const MV mv = { best_mv->row + neighbors[j].row,
- best_mv->col + neighbors[j].col };
+ grid_coord = grid_center + neighbors[j].coord_offset;
+ if (do_refine_search_grid[grid_coord] == 1) {
+ continue;
+ }
+ const MV mv = { best_mv->row + neighbors[j].coord.row,
+ best_mv->col + neighbors[j].coord.col };
+ do_refine_search_grid[grid_coord] = 1;
if (is_mv_in(&x->mv_limits, &mv)) {
unsigned int sad;
if (mask) {
@@ -2087,8 +2106,9 @@
if (best_site == -1) {
break;
} else {
- best_mv->row += neighbors[best_site].row;
- best_mv->col += neighbors[best_site].col;
+ best_mv->row += neighbors[best_site].coord.row;
+ best_mv->col += neighbors[best_site].coord.col;
+ grid_center += neighbors[best_site].coord_offset;
}
}
return best_sad;
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 7df15d6..592d89f 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -31,6 +31,11 @@
// for Block_16x16
#define BORDER_MV_PIXELS_B16 (16 + AOM_INTERP_EXTEND)
+#define SEARCH_RANGE_8P 3
+#define SEARCH_GRID_STRIDE_8P (2 * SEARCH_RANGE_8P + 1)
+#define SEARCH_GRID_CENTER_8P \
+ (SEARCH_RANGE_8P * SEARCH_GRID_STRIDE_8P + SEARCH_RANGE_8P)
+
// motion search site
typedef struct search_site {
MV mv;
@@ -43,6 +48,11 @@
int searches_per_step;
} search_site_config;
+typedef struct {
+ MV coord;
+ int coord_offset;
+} search_neighbors;
+
void av1_init_dsmotion_compensation(search_site_config *cfg, int stride);
void av1_init3smotion_compensation(search_site_config *cfg, int stride);
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 55908fb..6119431 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -6510,7 +6510,7 @@
const int have_newmv = have_nearmv_in_inter_mode(mbmi->mode);
const int ref_mv_idx = mbmi->ref_mv_idx + (have_newmv ? 1 : 0);
MV *const best_mv = &x->best_mv.as_mv;
- const int search_range = 3;
+ const int search_range = SEARCH_RANGE_8P;
const int sadpb = x->sadperbit16;
// Allow joint search multiple times iteratively for each reference frame
// and break out of the search loop if it couldn't find a better mv.
@@ -7153,7 +7153,7 @@
int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
MV *const best_mv = &x->best_mv.as_mv;
- int search_range = 3;
+ int search_range = SEARCH_RANGE_8P;
MvLimits tmp_mv_limits = x->mv_limits;