Bring back sdx4df for diamond search
Observed ~2% total encoding time reduction for two-pass
encoding of 720p at speed-4.
BUG=aomedia:2552
Change-Id: Ief1fb83e0c0470abddc57130a9f5f8081b6f6037
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 5ac02a8..451457b 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -1753,30 +1753,62 @@
const search_site *ss = cfg->ss[step];
best_site = 0;
- // TODO(jingning): Bring back sdx4df optimization for speed later.
- for (int idx = 1; idx <= cfg->searches_per_step[step]; ++idx) {
- // Trap illegal vectors
- const MV this_mv = { best_mv->row + ss[idx].mv.row,
- best_mv->col + ss[idx].mv.col };
+ int all_in = 1, j;
+ // Trap illegal vectors
+ all_in &= best_mv->row + ss[1].mv.row >= x->mv_limits.row_min;
+ all_in &= best_mv->row + ss[2].mv.row <= x->mv_limits.row_max;
+ all_in &= best_mv->col + ss[3].mv.col >= x->mv_limits.col_min;
+ all_in &= best_mv->col + ss[4].mv.col <= x->mv_limits.col_max;
- if (is_mv_in(&x->mv_limits, &this_mv)) {
- const uint8_t *const check_here = ss[idx].offset + best_address;
- unsigned int thissad;
+ // TODO(anyone): Implement 4 points search for msdf&sdaf
+ if (all_in && !mask && !second_pred) {
+ for (int idx = 1; idx <= cfg->searches_per_step[step]; idx += 4) {
+ unsigned char const *block_offset[4];
+ unsigned int sads[4];
- if (mask)
- thissad = fn_ptr->msdf(what, what_stride, check_here, in_what_stride,
- second_pred, mask, mask_stride, inv_mask);
- else if (second_pred)
- thissad = fn_ptr->sdaf(what, what_stride, check_here, in_what_stride,
- second_pred);
- else
- thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+ for (j = 0; j < 4; j++)
+ block_offset[j] = ss[idx + j].offset + best_address;
- if (thissad < bestsad) {
- thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sads);
+ for (j = 0; j < 4; j++) {
+ if (sads[j] < bestsad) {
+ const MV this_mv = { best_mv->row + ss[idx + j].mv.row,
+ best_mv->col + ss[idx + j].mv.col };
+ unsigned int thissad =
+ sads[j] + mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = idx + j;
+ }
+ }
+ }
+ }
+ } else {
+ for (int idx = 1; idx <= cfg->searches_per_step[step]; idx++) {
+ const MV this_mv = { best_mv->row + ss[idx].mv.row,
+ best_mv->col + ss[idx].mv.col };
+
+ if (is_mv_in(&x->mv_limits, &this_mv)) {
+ const uint8_t *const check_here = ss[idx].offset + best_address;
+ unsigned int thissad;
+
+ if (mask)
+ thissad =
+ fn_ptr->msdf(what, what_stride, check_here, in_what_stride,
+ second_pred, mask, mask_stride, inv_mask);
+ else if (second_pred)
+ thissad = fn_ptr->sdaf(what, what_stride, check_here,
+ in_what_stride, second_pred);
+ else
+ thissad =
+ fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+
if (thissad < bestsad) {
- bestsad = thissad;
- best_site = idx;
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ best_site = idx;
+ }
}
}
}