Add search site init for all motion search methods
Added search site initialization for all search methods.
Removed duplicate code and used pattern_search() for FAST_BIGDIA
search method. This patch also enabled the support to use
4-point sad for motion search methods >= HEX.
Change-Id: I4744e9a745ba85d642b40a45aaddcc70179210c2
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 9767338..8dff9e7 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -55,6 +55,19 @@
ms_buffers->obmc_mask = x->obmc_buffer.mask;
}
+// Array to inform which all search methods are having
+// same candidates and different in number of search steps.
+const SEARCH_METHODS search_method_lookup[NUM_SEARCH_METHODS] = {
+ DIAMOND, // DIAMOND
+ NSTEP, // NSTEP
+ HEX, // HEX
+ BIGDIA, // BIGDIA
+ SQUARE, // SQUARE
+ HEX, // FAST_HEX
+ BIGDIA, // FAST_DIAMOND
+ BIGDIA // FAST_BIGDIA
+};
+
static AOM_INLINE SEARCH_METHODS
get_faster_search_method(SEARCH_METHODS search_method) {
// Note on search method's accuracy:
@@ -71,16 +84,16 @@
case HEX: return FAST_HEX;
case FAST_HEX: return FAST_HEX;
case FAST_DIAMOND: return FAST_DIAMOND;
+ case FAST_BIGDIA: return FAST_BIGDIA;
default: assert(0 && "Invalid search method!"); return DIAMOND;
}
}
-void av1_make_default_fullpel_ms_params(FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
- const struct AV1_COMP *cpi,
- const MACROBLOCK *x, BLOCK_SIZE bsize,
- const MV *ref_mv,
- const search_site_config *search_sites,
- int fine_search_interval) {
+void av1_make_default_fullpel_ms_params(
+ FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const struct AV1_COMP *cpi,
+ const MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv,
+ const search_site_config search_sites[NUM_SEARCH_METHODS],
+ int fine_search_interval, SEARCH_METHODS search_method) {
const MV_SPEED_FEATURES *mv_sf = &cpi->sf.mv_sf;
// High level params
@@ -89,7 +102,7 @@
init_ms_buffers(&ms_params->ms_buffers, x);
- ms_params->search_method = mv_sf->search_method;
+ ms_params->search_method = search_method;
if (mv_sf->use_bsize_dependent_search_method) {
const int min_dim = AOMMIN(block_size_wide[bsize], block_size_high[bsize]);
if (min_dim >= 32) {
@@ -97,7 +110,8 @@
get_faster_search_method(ms_params->search_method);
}
}
- ms_params->search_sites = search_sites;
+ ms_params->search_sites =
+ &search_sites[search_method_lookup[ms_params->search_method]];
ms_params->mesh_patterns[0] = mv_sf->mesh_patterns;
ms_params->mesh_patterns[1] = mv_sf->intrabc_mesh_patterns;
@@ -378,7 +392,8 @@
cfg->num_search_steps = num_search_steps;
}
-void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
+// Search site initialization for NSTEP search method.
+void av1_init_motion_compensation_nstep(search_site_config *cfg, int stride) {
int num_search_steps = 0;
int stage_index = 0;
cfg->stride = stride;
@@ -420,6 +435,8 @@
cfg->num_search_steps = num_search_steps;
}
+// Search site initialization for BIGDIA / FAST_BIGDIA / FAST_DIAMOND
+// search methods.
void av1_init_motion_compensation_bigdia(search_site_config *cfg, int stride) {
cfg->stride = stride;
// First scale has 4-closest points, the rest have 8 points in diamond
@@ -458,14 +475,117 @@
};
/* clang-format on */
+ int radius = 1;
for (int i = 0; i < MAX_PATTERN_SCALES; ++i) {
cfg->searches_per_step[i] = bigdia_num_candidates[i];
+ cfg->radius[i] = radius;
for (int j = 0; j < MAX_PATTERN_CANDIDATES; ++j) {
search_site *const site = &cfg->site[i][j];
site->mv = site_candidates[i][j];
site->offset = get_offset_from_fullmv(&site->mv, stride);
}
+ radius *= 2;
}
+ cfg->num_search_steps = MAX_PATTERN_SCALES;
+}
+
+// Search site initialization for SQUARE search method.
+void av1_init_motion_compensation_square(search_site_config *cfg, int stride) {
+ cfg->stride = stride;
+ // All scales have 8 closest points in square shape.
+ static const int square_num_candidates[MAX_PATTERN_SCALES] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ };
+
+ // Square search method candidates.
+ // Note that the largest candidate step at each scale is 2^scale.
+ /* clang-format off */
+ static const FULLPEL_MV
+ square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
+ { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
+ { -1, 1 }, { -1, 0 } },
+ { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
+ { -2, 2 }, { -2, 0 } },
+ { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
+ { -4, 4 }, { -4, 0 } },
+ { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
+ { -8, 8 }, { -8, 0 } },
+ { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
+ { 0, 16 }, { -16, 16 }, { -16, 0 } },
+ { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
+ { 0, 32 }, { -32, 32 }, { -32, 0 } },
+ { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
+ { 0, 64 }, { -64, 64 }, { -64, 0 } },
+ { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 },
+ { 128, 128 }, { 0, 128 }, { -128, 128 }, { -128, 0 } },
+ { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 },
+ { 256, 256 }, { 0, 256 }, { -256, 256 }, { -256, 0 } },
+ { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 },
+ { 512, 512 }, { 0, 512 }, { -512, 512 }, { -512, 0 } },
+ { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
+ { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
+ };
+
+ /* clang-format on */
+ int radius = 1;
+ for (int i = 0; i < MAX_PATTERN_SCALES; ++i) {
+ cfg->searches_per_step[i] = square_num_candidates[i];
+ cfg->radius[i] = radius;
+ for (int j = 0; j < MAX_PATTERN_CANDIDATES; ++j) {
+ search_site *const site = &cfg->site[i][j];
+ site->mv = square_candidates[i][j];
+ site->offset = get_offset_from_fullmv(&site->mv, stride);
+ }
+ radius *= 2;
+ }
+ cfg->num_search_steps = MAX_PATTERN_SCALES;
+}
+
+// Search site initialization for HEX / FAST_HEX search methods.
+void av1_init_motion_compensation_hex(search_site_config *cfg, int stride) {
+ cfg->stride = stride;
+ // First scale has 8-closest points, the rest have 6 points in hex shape
+ // at increasing scales.
+ static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6 };
+ // Note that the largest candidate step at each scale is 2^scale.
+ /* clang-format off */
+ static const FULLPEL_MV
+ hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
+ { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
+ { -1, 1 }, { -1, 0 } },
+ { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
+ { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
+ { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
+ { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 },
+ { -8, 16 }, { -16, 0 } },
+ { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
+ { -32, 0 } },
+ { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
+ { -64, 0 } },
+ { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 },
+ { -64, 128 }, { -128, 0 } },
+ { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 },
+ { -128, 256 }, { -256, 0 } },
+ { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 },
+ { -256, 512 }, { -512, 0 } },
+ { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
+ { -512, 1024 }, { -1024, 0 } },
+ };
+
+ /* clang-format on */
+ int radius = 1;
+ for (int i = 0; i < MAX_PATTERN_SCALES; ++i) {
+ cfg->searches_per_step[i] = hex_num_candidates[i];
+ cfg->radius[i] = radius;
+ for (int j = 0; j < hex_num_candidates[i]; ++j) {
+ search_site *const site = &cfg->site[i][j];
+ site->mv = hex_candidates[i][j];
+ site->offset = get_offset_from_fullmv(&site->mv, stride);
+ }
+ radius *= 2;
+ }
+ cfg->num_search_steps = MAX_PATTERN_SCALES;
}
// Checks whether the mv is within range of the mv_limits
@@ -697,7 +817,7 @@
const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv,
FULLPEL_MV *temp_best_mv, unsigned int *bestsad, unsigned int *raw_bestsad,
- int search_step, int *best_site, int num_candidates) {
+ int search_step, int *best_site, int cand_start) {
const struct buf_2d *const src = ms_params->ms_buffers.src;
const struct buf_2d *const ref = ms_params->ms_buffers.ref;
const search_site *site = ms_params->search_sites->site[search_step];
@@ -710,21 +830,21 @@
const int src_stride = src->stride;
best_address = get_buf_from_fullmv(ref, temp_best_mv);
// Loop over number of candidates.
- for (int i = 0; i < num_candidates; i += 4) {
- for (int j = 0; j < 4; j++)
- block_offset[j] = site[i + j].offset + best_address;
+ for (int j = 0; j < 4; j++)
+ block_offset[j] = site[cand_start + j].offset + best_address;
- // 4-point sad calcuation.
- vfp->sdx4df(src_buf, src_stride, block_offset, ref->stride, sads);
+ // 4-point sad calcuation.
+ vfp->sdx4df(src_buf, src_stride, block_offset, ref->stride, sads);
- for (int j = 0; j < 4; j++) {
- const FULLPEL_MV this_mv = { temp_best_mv->row + site[i + j].mv.row,
- temp_best_mv->col + site[i + j].mv.col };
- const int found_better_mv = update_mvs_and_sad(
- sads[j], &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) *best_site = i + j;
- }
+ for (int j = 0; j < 4; j++) {
+ const FULLPEL_MV this_mv = {
+ temp_best_mv->row + site[cand_start + j].mv.row,
+ temp_best_mv->col + site[cand_start + j].mv.col
+ };
+ const int found_better_mv = update_mvs_and_sad(
+ sads[j], &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv,
+ /*second_best_mv=*/NULL);
+ if (found_better_mv) *best_site = cand_start + j;
}
}
@@ -734,12 +854,12 @@
const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv,
FULLPEL_MV *temp_best_mv, unsigned int *bestsad, unsigned int *raw_bestsad,
- int search_step, int *best_site, const int num_candidates) {
+ int search_step, int *best_site, const int num_candidates, int cand_start) {
const struct buf_2d *const src = ms_params->ms_buffers.src;
const struct buf_2d *const ref = ms_params->ms_buffers.ref;
const search_site *site = ms_params->search_sites->site[search_step];
// Loop over number of candidates.
- for (int i = 0; i < num_candidates; i++) {
+ for (int i = cand_start; i < num_candidates; i++) {
const FULLPEL_MV this_mv = { temp_best_mv->row + site[i].mv.row,
temp_best_mv->col + site[i].mv.col };
if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) continue;
@@ -752,276 +872,14 @@
}
}
-static int bigdia_search_level1(FULLPEL_MV start_mv,
- const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
- const int search_step, const int do_init_search,
- int *cost_list, FULLPEL_MV *best_mv) {
- static const int search_steps[MAX_MVSEARCH_STEPS] = {
- 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
- };
- int i, s, t;
- const struct buf_2d *const src = ms_params->ms_buffers.src;
- const struct buf_2d *const ref = ms_params->ms_buffers.ref;
- const search_site_config *search_sites = ms_params->search_sites;
- const int *num_candidates = search_sites->searches_per_step;
- const int ref_stride = ref->stride;
- const int last_is_4 = num_candidates[0] == 4;
- int br, bc;
- unsigned int bestsad = UINT_MAX, raw_bestsad = UINT_MAX;
- int thissad;
- int k = -1;
- const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params;
- int best_init_s = search_steps[search_step];
- // adjust ref_mv to make sure it is within MV range
- clamp_fullmv(&start_mv, &ms_params->mv_limits);
- br = start_mv.row;
- bc = start_mv.col;
- if (cost_list != NULL) {
- cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
- INT_MAX;
- }
- int costlist_has_sad = 0;
-
- // Work out the start point for the search
- raw_bestsad = get_mvpred_sad(ms_params, src,
- get_buf_from_fullmv(ref, &start_mv), ref_stride);
- bestsad = raw_bestsad + mvsad_err_cost_(&start_mv, mv_cost_params);
- // Search all possible scales up to the search param around the center point
- // pick the scale of the point that is best as the starting scale of
- // further steps around it.
- if (do_init_search) {
- s = best_init_s;
- best_init_s = -1;
- for (t = 0; t <= s; ++t) {
- int best_site = -1;
- FULLPEL_MV temp_best_mv;
- temp_best_mv.row = br;
- temp_best_mv.col = bc;
- if (check_bounds(&ms_params->mv_limits, br, bc, 1 << t)) {
- calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv,
- &temp_best_mv, &bestsad, &raw_bestsad, t,
- &best_site, num_candidates[t]);
- } else {
- calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv,
- &temp_best_mv, &bestsad, &raw_bestsad, t,
- &best_site, num_candidates[t]);
- }
- if (best_site == -1) {
- continue;
- } else {
- best_init_s = t;
- k = best_site;
- }
- }
- if (best_init_s != -1) {
- br += search_sites->site[best_init_s][k].mv.row;
- bc += search_sites->site[best_init_s][k].mv.col;
- }
- }
-
- // If the center point is still the best, just skip this and move to
- // the refinement step.
- if (best_init_s != -1) {
- const int last_s = (last_is_4 && cost_list != NULL);
- int best_site = -1;
- s = best_init_s;
- for (; s >= last_s; s--) {
- // No need to search all points the 1st time if initial search was used
- if (!do_init_search || s != best_init_s) {
- FULLPEL_MV temp_best_mv;
- temp_best_mv.row = br;
- temp_best_mv.col = bc;
- if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
- calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv,
- &temp_best_mv, &bestsad, &raw_bestsad, s,
- &best_site, num_candidates[s]);
- } else {
- calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv,
- &temp_best_mv, &bestsad, &raw_bestsad, s,
- &best_site, num_candidates[s]);
- }
-
- if (best_site == -1) {
- continue;
- } else {
- br += search_sites->site[s][best_site].mv.row;
- bc += search_sites->site[s][best_site].mv.col;
- k = best_site;
- }
- }
-
- do {
- int next_chkpts_indices[PATTERN_CANDIDATES_REF];
- best_site = -1;
- next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
- next_chkpts_indices[1] = k;
- next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
-
- if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const FULLPEL_MV this_mv = {
- br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
- bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
- };
- thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
- } else {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const FULLPEL_MV this_mv = {
- br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
- bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
- };
- if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv))
- continue;
- thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
- }
-
- if (best_site != -1) {
- k = next_chkpts_indices[best_site];
- br += search_sites->site[s][k].mv.row;
- bc += search_sites->site[s][k].mv.col;
- }
- } while (best_site != -1);
- }
-
- // Note: If we enter the if below, then cost_list must be non-NULL.
- if (s == 0) {
- cost_list[0] = raw_bestsad;
- costlist_has_sad = 1;
- if (!do_init_search || s != best_init_s) {
- if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < num_candidates[s]; i++) {
- const FULLPEL_MV this_mv = { br + search_sites->site[s][i].mv.row,
- bc + search_sites->site[s][i].mv.col };
- cost_list[i + 1] = thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
- } else {
- for (i = 0; i < num_candidates[s]; i++) {
- const FULLPEL_MV this_mv = { br + search_sites->site[s][i].mv.row,
- bc + search_sites->site[s][i].mv.col };
- if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv))
- continue;
- cost_list[i + 1] = thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
- }
-
- if (best_site != -1) {
- br += search_sites->site[s][best_site].mv.row;
- bc += search_sites->site[s][best_site].mv.col;
- k = best_site;
- }
- }
- while (best_site != -1) {
- int next_chkpts_indices[PATTERN_CANDIDATES_REF];
- best_site = -1;
- next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
- next_chkpts_indices[1] = k;
- next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
- cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
- cost_list[((k + 2) % 4) + 1] = cost_list[0];
- cost_list[0] = raw_bestsad;
-
- if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const FULLPEL_MV this_mv = {
- br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
- bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
- };
- cost_list[next_chkpts_indices[i] + 1] = thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
- } else {
- for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
- const FULLPEL_MV this_mv = {
- br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
- bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
- };
- if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) {
- cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
- continue;
- }
- cost_list[next_chkpts_indices[i] + 1] = thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
- }
-
- if (best_site != -1) {
- k = next_chkpts_indices[best_site];
- br += search_sites->site[s][k].mv.row;
- bc += search_sites->site[s][k].mv.col;
- }
- }
- }
- }
-
- best_mv->row = br;
- best_mv->col = bc;
-
- // Returns the one-away integer pel cost/sad around the best as follows:
- // cost_list[0]: cost/sad at the best integer pel
- // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel
- // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
- // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
- // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
- if (cost_list) {
- if (USE_SAD_COSTLIST) {
- calc_int_sad_list(*best_mv, ms_params, cost_list, costlist_has_sad);
- } else {
- calc_int_cost_list(*best_mv, ms_params, cost_list);
- }
- }
- best_mv->row = br;
- best_mv->col = bc;
-
- const int var_cost = get_mvpred_var_cost(ms_params, best_mv);
- return var_cost;
-}
-
// Generic pattern search function that searches over multiple scales.
// Each scale can have a different number of candidates and shape of
// candidates as indicated in the num_candidates and candidates arrays
// passed into this function
-static int pattern_search(
- FULLPEL_MV start_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
- int search_step, const int do_init_search,
- const int num_candidates[MAX_PATTERN_SCALES],
- const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES],
- int *cost_list, FULLPEL_MV *best_mv) {
+static int pattern_search(FULLPEL_MV start_mv,
+ const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
+ int search_step, const int do_init_search,
+ int *cost_list, FULLPEL_MV *best_mv) {
static const int search_steps[MAX_MVSEARCH_STEPS] = {
10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
};
@@ -1029,6 +887,8 @@
const struct buf_2d *const src = ms_params->ms_buffers.src;
const struct buf_2d *const ref = ms_params->ms_buffers.ref;
+ const search_site_config *search_sites = ms_params->search_sites;
+ const int *num_candidates = search_sites->searches_per_step;
const int ref_stride = ref->stride;
const int last_is_4 = num_candidates[0] == 4;
int br, bc;
@@ -1062,31 +922,27 @@
best_init_s = -1;
for (t = 0; t <= s; ++t) {
int best_site = -1;
+ FULLPEL_MV temp_best_mv;
+ temp_best_mv.row = br;
+ temp_best_mv.col = bc;
if (check_bounds(&ms_params->mv_limits, br, bc, 1 << t)) {
- for (i = 0; i < num_candidates[t]; i++) {
- const FULLPEL_MV this_mv = { br + candidates[t][i].row,
- bc + candidates[t][i].col };
- thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
+ // Call 4-point sad for multiples of 4 candidates.
+ const int no_of_4_cand_loops = num_candidates[t] >> 2;
+ for (i = 0; i < no_of_4_cand_loops; i++) {
+ calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv,
+ &temp_best_mv, &bestsad, &raw_bestsad, t,
+ &best_site, i * 4);
}
+ // Rest of the candidates
+ const int remaining_cand = num_candidates[t] % 4;
+ calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv,
+ &temp_best_mv, &bestsad, &raw_bestsad, t,
+ &best_site, remaining_cand,
+ no_of_4_cand_loops * 4);
} else {
- for (i = 0; i < num_candidates[t]; i++) {
- const FULLPEL_MV this_mv = { br + candidates[t][i].row,
- bc + candidates[t][i].col };
- if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) continue;
- thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
+ calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv,
+ &temp_best_mv, &bestsad, &raw_bestsad, t,
+ &best_site, num_candidates[t], 0);
}
if (best_site == -1) {
continue;
@@ -1096,8 +952,8 @@
}
}
if (best_init_s != -1) {
- br += candidates[best_init_s][k].row;
- bc += candidates[best_init_s][k].col;
+ br += search_sites->site[best_init_s][k].mv.row;
+ bc += search_sites->site[best_init_s][k].mv.col;
}
}
@@ -1111,39 +967,34 @@
for (; s >= last_s; s--) {
// No need to search all points the 1st time if initial search was used
if (!do_init_search || s != best_init_s) {
+ FULLPEL_MV temp_best_mv;
+ temp_best_mv.row = br;
+ temp_best_mv.col = bc;
if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
- for (i = 0; i < num_candidates[s]; i++) {
- const FULLPEL_MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
- thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
+ // Call 4-point sad for multiples of 4 candidates.
+ const int no_of_4_cand_loops = num_candidates[s] >> 2;
+ for (i = 0; i < no_of_4_cand_loops; i++) {
+ calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv,
+ &temp_best_mv, &bestsad, &raw_bestsad, s,
+ &best_site, i * 4);
}
+ // Rest of the candidates
+ const int remaining_cand = num_candidates[s] % 4;
+ calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv,
+ &temp_best_mv, &bestsad, &raw_bestsad, s,
+ &best_site, remaining_cand,
+ no_of_4_cand_loops * 4);
} else {
- for (i = 0; i < num_candidates[s]; i++) {
- const FULLPEL_MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
- if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv))
- continue;
- thissad = get_mvpred_sad(
- ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
- const int found_better_mv =
- update_mvs_and_sad(thissad, &this_mv, mv_cost_params, &bestsad,
- &raw_bestsad, best_mv,
- /*second_best_mv=*/NULL);
- if (found_better_mv) best_site = i;
- }
+ calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv,
+ &temp_best_mv, &bestsad, &raw_bestsad, s,
+ &best_site, num_candidates[s], 0);
}
if (best_site == -1) {
continue;
} else {
- br += candidates[s][best_site].row;
- bc += candidates[s][best_site].col;
+ br += search_sites->site[s][best_site].mv.row;
+ bc += search_sites->site[s][best_site].mv.col;
k = best_site;
}
}
@@ -1158,8 +1009,8 @@
if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
const FULLPEL_MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
+ br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
+ bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
};
thissad = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
@@ -1172,8 +1023,8 @@
} else {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
const FULLPEL_MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
+ br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
+ bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
};
if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv))
continue;
@@ -1189,8 +1040,8 @@
if (best_site != -1) {
k = next_chkpts_indices[best_site];
- br += candidates[s][k].row;
- bc += candidates[s][k].col;
+ br += search_sites->site[s][k].mv.row;
+ bc += search_sites->site[s][k].mv.col;
}
} while (best_site != -1);
}
@@ -1202,8 +1053,8 @@
if (!do_init_search || s != best_init_s) {
if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
for (i = 0; i < num_candidates[s]; i++) {
- const FULLPEL_MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
+ const FULLPEL_MV this_mv = { br + search_sites->site[s][i].mv.row,
+ bc + search_sites->site[s][i].mv.col };
cost_list[i + 1] = thissad = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
const int found_better_mv =
@@ -1214,8 +1065,8 @@
}
} else {
for (i = 0; i < num_candidates[s]; i++) {
- const FULLPEL_MV this_mv = { br + candidates[s][i].row,
- bc + candidates[s][i].col };
+ const FULLPEL_MV this_mv = { br + search_sites->site[s][i].mv.row,
+ bc + search_sites->site[s][i].mv.col };
if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv))
continue;
cost_list[i + 1] = thissad = get_mvpred_sad(
@@ -1229,8 +1080,8 @@
}
if (best_site != -1) {
- br += candidates[s][best_site].row;
- bc += candidates[s][best_site].col;
+ br += search_sites->site[s][best_site].mv.row;
+ bc += search_sites->site[s][best_site].mv.col;
k = best_site;
}
}
@@ -1247,8 +1098,8 @@
if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
const FULLPEL_MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
+ br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
+ bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
};
cost_list[next_chkpts_indices[i] + 1] = thissad = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
@@ -1261,8 +1112,8 @@
} else {
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
const FULLPEL_MV this_mv = {
- br + candidates[s][next_chkpts_indices[i]].row,
- bc + candidates[s][next_chkpts_indices[i]].col
+ br + search_sites->site[s][next_chkpts_indices[i]].mv.row,
+ bc + search_sites->site[s][next_chkpts_indices[i]].mv.col
};
if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) {
cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
@@ -1280,8 +1131,8 @@
if (best_site != -1) {
k = next_chkpts_indices[best_site];
- br += candidates[s][k].row;
- bc += candidates[s][k].col;
+ br += search_sites->site[s][k].mv.row;
+ bc += search_sites->site[s][k].mv.col;
}
}
}
@@ -1325,118 +1176,24 @@
const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
const int search_step, const int do_init_search,
int *cost_list, FULLPEL_MV *best_mv) {
- // First scale has 8-closest points, the rest have 6 points in hex shape
- // at increasing scales
- static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6 };
- // Note that the largest candidate step at each scale is 2^scale
- /* clang-format off */
- static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
- { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
- { -1, 0 } },
- { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
- { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
- { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
- { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
- { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
- { -32, 0 } },
- { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
- { -64, 0 } },
- { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
- { -128, 0 } },
- { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
- { -256, 0 } },
- { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
- { -512, 0 } },
- { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
- { -512, 1024 }, { -1024, 0 } },
- };
- /* clang-format on */
return pattern_search(start_mv, ms_params, search_step, do_init_search,
- hex_num_candidates, hex_candidates, cost_list, best_mv);
+ cost_list, best_mv);
}
static int bigdia_search(const FULLPEL_MV start_mv,
const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
const int search_step, const int do_init_search,
int *cost_list, FULLPEL_MV *best_mv) {
- // First scale has 4-closest points, the rest have 8 points in diamond
- // shape at increasing scales
- static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
- 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- };
- // Note that the largest candidate step at each scale is 2^scale
- /* clang-format off */
- static const MV
- bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
- { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
- { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
- { -1, 1 }, { -2, 0 } },
- { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
- { -2, 2 }, { -4, 0 } },
- { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
- { -4, 4 }, { -8, 0 } },
- { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
- { -8, 8 }, { -16, 0 } },
- { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
- { 0, 32 }, { -16, 16 }, { -32, 0 } },
- { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
- { 0, 64 }, { -32, 32 }, { -64, 0 } },
- { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
- { 0, 128 }, { -64, 64 }, { -128, 0 } },
- { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
- { 0, 256 }, { -128, 128 }, { -256, 0 } },
- { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
- { 0, 512 }, { -256, 256 }, { -512, 0 } },
- { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
- { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
- };
-
- /* clang-format on */
return pattern_search(start_mv, ms_params, search_step, do_init_search,
- bigdia_num_candidates, bigdia_candidates, cost_list,
- best_mv);
+ cost_list, best_mv);
}
static int square_search(const FULLPEL_MV start_mv,
const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
const int search_step, const int do_init_search,
int *cost_list, FULLPEL_MV *best_mv) {
- // All scales have 8 closest points in square shape
- static const int square_num_candidates[MAX_PATTERN_SCALES] = {
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- };
- // Note that the largest candidate step at each scale is 2^scale
- /* clang-format off */
- static const MV
- square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
- { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
- { -1, 1 }, { -1, 0 } },
- { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
- { -2, 2 }, { -2, 0 } },
- { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
- { -4, 4 }, { -4, 0 } },
- { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
- { -8, 8 }, { -8, 0 } },
- { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
- { 0, 16 }, { -16, 16 }, { -16, 0 } },
- { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
- { 0, 32 }, { -32, 32 }, { -32, 0 } },
- { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
- { 0, 64 }, { -64, 64 }, { -64, 0 } },
- { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
- { 0, 128 }, { -128, 128 }, { -128, 0 } },
- { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
- { 0, 256 }, { -256, 256 }, { -256, 0 } },
- { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
- { 0, 512 }, { -512, 512 }, { -512, 0 } },
- { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
- { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
- };
- /* clang-format on */
return pattern_search(start_mv, ms_params, search_step, do_init_search,
- square_num_candidates, square_candidates, cost_list,
- best_mv);
+ cost_list, best_mv);
}
static int fast_hex_search(const FULLPEL_MV start_mv,
@@ -1461,9 +1218,9 @@
const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
const int search_step, const int do_init_search,
int *cost_list, FULLPEL_MV *best_mv) {
- return bigdia_search_level1(start_mv, ms_params,
- AOMMAX(MAX_MVSEARCH_STEPS - 3, search_step),
- do_init_search, cost_list, best_mv);
+ return bigdia_search(start_mv, ms_params,
+ AOMMAX(MAX_MVSEARCH_STEPS - 3, search_step),
+ do_init_search, cost_list, best_mv);
}
static int diamond_search_sad(FULLPEL_MV start_mv,