Extend downsample SAD computation for lowres, midres
Added a new method to enable downsample SAD during motion
search based on the SAD deviation of the even rows to odd
rows.
Instruction Count BD-Rate Loss(%)
cpu Reduction(%) avg.psnr ovr.psnr ssim
5 1.748 -0.0222 -0.0224 -0.0220
6 1.379 0.0284 0.0274 0.0632
STATS_CHANGED for good preset, speed 5 and 6
Change-Id: I65cfd6d85f648ac4b2a55e04b5fe209ca2cd6d86
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index befdb50..c0d658b 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -280,7 +280,7 @@
cpi->is_screen_content_type && cpi->common.features.allow_intrabc;
FULLPEL_MOTION_SEARCH_PARAMS ms_params;
av1_make_default_fullpel_ms_params(&ms_params, cpi, x, bsize, ref_mv,
- first_pass_search_sites,
+ start_mv, first_pass_search_sites,
fine_search_interval);
av1_set_mv_search_method(&ms_params, first_pass_search_sites, NSTEP);
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index b8d78b9..b316616 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -94,10 +94,12 @@
void av1_make_default_fullpel_ms_params(
FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const struct AV1_COMP *cpi,
- MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv,
+ MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv, FULLPEL_MV start_mv,
const search_site_config search_sites[NUM_DISTINCT_SEARCH_METHODS],
int fine_search_interval) {
const MV_SPEED_FEATURES *mv_sf = &cpi->sf.mv_sf;
+ const int is_key_frame =
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == KF_UPDATE;
// High level params
ms_params->bsize = bsize;
@@ -129,19 +131,6 @@
av1_set_mv_search_method(ms_params, search_sites, search_method);
- const int use_downsampled_sad =
- mv_sf->use_downsampled_sad && block_size_high[bsize] >= 16;
- if (use_downsampled_sad) {
- ms_params->sdf = ms_params->vfp->sdsf;
- ms_params->sdx4df = ms_params->vfp->sdsx4df;
- // Skip version of sadx3 is not is not available yet
- ms_params->sdx3df = ms_params->vfp->sdsx4df;
- } else {
- ms_params->sdf = ms_params->vfp->sdf;
- ms_params->sdx4df = ms_params->vfp->sdx4df;
- ms_params->sdx3df = ms_params->vfp->sdx3df;
- }
-
ms_params->mesh_patterns[0] = mv_sf->mesh_patterns;
ms_params->mesh_patterns[1] = mv_sf->intrabc_mesh_patterns;
ms_params->force_mesh_thresh = mv_sf->exhaustive_searches_thresh;
@@ -161,6 +150,47 @@
// Mvcost params
init_mv_cost_params(&ms_params->mv_cost_params, x->mv_costs, ref_mv,
x->errorperbit, x->sadperbit);
+
+ ms_params->sdf = ms_params->vfp->sdf;
+ ms_params->sdx4df = ms_params->vfp->sdx4df;
+ ms_params->sdx3df = ms_params->vfp->sdx3df;
+
+ if (mv_sf->use_downsampled_sad == 2 && block_size_high[bsize] >= 16) {
+ ms_params->sdf = ms_params->vfp->sdsf;
+ ms_params->sdx4df = ms_params->vfp->sdsx4df;
+ // Skip version of sadx3 is not available yet
+ ms_params->sdx3df = ms_params->vfp->sdsx4df;
+ } else if (mv_sf->use_downsampled_sad == 1 && block_size_high[bsize] >= 16 &&
+ !is_key_frame) {
+ FULLPEL_MV start_mv_clamped = start_mv;
+ // adjust start_mv to make sure it is within MV range
+ clamp_fullmv(&start_mv_clamped, &ms_params->mv_limits);
+
+ const struct buf_2d *const ref = ms_params->ms_buffers.ref;
+ const int ref_stride = ref->stride;
+ const uint8_t *best_address = get_buf_from_fullmv(ref, &start_mv_clamped);
+ const struct buf_2d *const src = ms_params->ms_buffers.src;
+ const uint8_t *src_buf = src->buf;
+ const int src_stride = src->stride;
+
+ unsigned int start_mv_sad_even_rows, start_mv_sad_odd_rows;
+ start_mv_sad_even_rows =
+ ms_params->vfp->sdsf(src_buf, src_stride, best_address, ref_stride);
+ start_mv_sad_odd_rows =
+ ms_params->vfp->sdsf(src_buf + src_stride, src_stride,
+ best_address + ref_stride, ref_stride);
+
+ // If the absolute SAD difference computed between the pred-to-src of even
+ // and odd rows is small, skip every other row in sad computation.
+ const int odd_to_even_diff_sad =
+ abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows);
+ const int mult_thresh = 4;
+ if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) {
+ ms_params->sdf = ms_params->vfp->sdsf;
+ ms_params->sdx4df = ms_params->vfp->sdsx4df;
+ ms_params->sdx3df = ms_params->vfp->sdsx4df;
+ }
+ }
}
void av1_set_ms_to_intra_mode(FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index c4468cf..51a4777 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -144,7 +144,7 @@
void av1_make_default_fullpel_ms_params(
FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const struct AV1_COMP *cpi,
- MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv,
+ MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv, FULLPEL_MV start_mv,
const search_site_config search_sites[NUM_DISTINCT_SEARCH_METHODS],
int fine_search_interval);
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index da6729c..b771b05 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -265,8 +265,6 @@
// Allow more mesh searches for screen content type on the ARF.
const int fine_search_interval = use_fine_search_interval(cpi);
FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
- av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
- src_search_site_cfg, fine_search_interval);
switch (mbmi->motion_mode) {
case SIMPLE_TRANSLATION: {
@@ -278,7 +276,11 @@
if (smv.as_int == INVALID_MV) continue;
- int thissme =
+ av1_make_default_fullpel_ms_params(
+ &full_ms_params, cpi, x, bsize, &ref_mv, smv.as_fullmv,
+ src_search_site_cfg, fine_search_interval);
+
+ const int thissme =
av1_full_pixel_search(smv.as_fullmv, &full_ms_params, step_param,
cond_cost_list(cpi, cost_list), &this_best_mv,
&this_second_best_mv);
@@ -294,6 +296,10 @@
}
} break;
case OBMC_CAUSAL:
+ av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
+ &ref_mv, start_mv, src_search_site_cfg,
+ fine_search_interval);
+
bestsme = av1_obmc_full_pixel_search(start_mv, &full_ms_params,
step_param, &best_mv->as_fullmv);
break;
@@ -618,16 +624,16 @@
const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
const search_site_config *src_search_sites =
av1_get_search_site_config(cpi, x, search_method);
+ // Use the mv result from the single mode as mv predictor.
+ const FULLPEL_MV start_fullmv = get_fullmv_from_mv(&cur_mv[id].as_mv);
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
- &ref_mv[id].as_mv, src_search_sites,
+ &ref_mv[id].as_mv, start_fullmv,
+ src_search_sites,
/*fine_search_interval=*/0);
av1_set_ms_compound_refs(&full_ms_params.ms_buffers, second_pred, mask,
mask_stride, id);
- // Use the mv result from the single mode as mv predictor.
- const FULLPEL_MV start_fullmv = get_fullmv_from_mv(&cur_mv[id].as_mv);
-
// Small-range full-pixel motion search.
if (!cpi->sf.mv_sf.disable_extensive_joint_motion_search &&
mbmi->interinter_comp.type != COMPOUND_WEDGE) {
@@ -772,16 +778,16 @@
const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
const search_site_config *src_search_sites =
av1_get_search_site_config(cpi, x, search_method);
+ // Use the mv result from the single mode as mv predictor.
+ const FULLPEL_MV start_fullmv = get_fullmv_from_mv(this_mv);
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
- &ref_mv.as_mv, src_search_sites,
+ &ref_mv.as_mv, start_fullmv,
+ src_search_sites,
/*fine_search_interval=*/0);
av1_set_ms_compound_refs(&full_ms_params.ms_buffers, second_pred, mask,
mask_stride, ref_idx);
- // Use the mv result from the single mode as mv predictor.
- const FULLPEL_MV start_fullmv = get_fullmv_from_mv(this_mv);
-
// Small-range full-pixel motion search.
bestsme = av1_full_pixel_search(start_fullmv, &full_ms_params, 5, NULL,
&best_mv.as_fullmv, NULL);
@@ -999,7 +1005,8 @@
const search_site_config *src_search_sites =
av1_get_search_site_config(cpi, x, search_method);
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
- src_search_sites, fine_search_interval);
+ start_mv, src_search_sites,
+ fine_search_interval);
var = av1_full_pixel_search(start_mv, &full_ms_params, step_param,
cond_cost_list(cpi, cost_list),
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index 7754cd6..05bcf1d 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -238,7 +238,7 @@
av1_get_search_site_config(cpi, x, search_method);
FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, ¢er_mv,
- src_search_sites,
+ start_mv, src_search_sites,
/*fine_search_interval=*/0);
const unsigned int full_var_rd = av1_full_pixel_search(
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 7ce91a9..9fc255d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3189,8 +3189,10 @@
FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
const search_site_config *lookahead_search_sites =
cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
+ const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
- &dv_ref.as_mv, lookahead_search_sites,
+ &dv_ref.as_mv, start_mv,
+ lookahead_search_sites,
/*fine_search_interval=*/0);
const IntraBCMVCosts *const dv_costs = x->dv_costs;
av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
@@ -3237,7 +3239,6 @@
}
const int step_param = cpi->mv_search_params.mv_step_param;
- const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
int_mv best_mv, best_hash_mv;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index c3b00ff..594ecb7 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -206,7 +206,7 @@
if (is_720p_or_larger) {
// TODO(chiyotsai@google.com): make this speed feature adaptive based on
// current block's vertical texture instead of hardcoded with resolution
- sf->mv_sf.use_downsampled_sad = 1;
+ sf->mv_sf.use_downsampled_sad = 2;
}
if (speed >= 1) {
@@ -587,6 +587,13 @@
const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
const bool use_hbd = cpi->oxcf.use_highbitdepth;
+ // Speed features applicable for temporal filtering and tpl modules may be
+ // changed based on frame type at places where the sf is applied (Example :
+ // use_downsampled_sad). This is because temporal filtering and tpl modules
+ // are called before this function (except for the first key frame).
+ // TODO(deepa.kg@ittiam.com): For the speed features applicable to temporal
+ // filtering and tpl modules, modify the sf initialization appropriately
+ // before calling the modules.
const int boosted = frame_is_boosted(cpi);
const int is_boosted_arf2_bwd_type =
boosted ||
@@ -625,7 +632,7 @@
if (is_720p_or_larger) {
// TODO(chiyotsai@google.com): make this speed feature adaptive based on
// current block's vertical texture instead of hardcoded with resolution
- sf->mv_sf.use_downsampled_sad = 1;
+ sf->mv_sf.use_downsampled_sad = 2;
}
if (!is_720p_or_larger) {
@@ -817,6 +824,7 @@
sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
sf->inter_sf.prune_nearest_near_mv_using_refmv_weight =
(boosted || allow_screen_content_tools) ? 0 : 1;
+ sf->mv_sf.use_downsampled_sad = 1;
}
if (!is_480p_or_larger) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index ae9b99e..910e191 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -808,7 +808,16 @@
int full_pixel_search_level;
// Whether to downsample the rows in sad calculation during motion search.
- // This is only active when there are at least 16 rows.
+ // This is only active when there are at least 16 rows. When this sf is
+ // active, if there is a large discrepancy in the SAD values for the final
+ // motion vector between skipping vs not skipping, motion search is redone
+ // with skip row features off.
+ // 0: Disabled (do not downsample rows)
+ // 1: Skip SAD calculation of odd rows if the SAD deviation of the even and
+ // odd rows for the starting MV is small. Redo motion search with sf off
+ // when SAD deviation is high for the final motion vector.
+ // 2: Skip SAD calculation of odd rows. SAD deviation is not tested for the
+ // start MV and tested only for the final MV.
int use_downsampled_sad;
// Enable/disable extensive joint motion search.
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 050c701..76897e8 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -146,7 +146,7 @@
const int q = av1_get_q(cpi);
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb, block_size,
- &baseline_mv, search_site_cfg,
+ &baseline_mv, start_mv, search_site_cfg,
/*fine_search_interval=*/0);
av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
full_ms_params.run_mesh_search = 1;
@@ -205,7 +205,7 @@
mbd->plane[0].pre[0].buf = ref_frame->y_buffer + y_offset + offset;
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, mb,
subblock_size, &baseline_mv,
- search_site_cfg,
+ start_mv, search_site_cfg,
/*fine_search_interval=*/0);
av1_set_mv_search_method(&full_ms_params, search_site_cfg,
search_method);
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index 55a2b27..c28b6e9 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -278,7 +278,7 @@
FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, ¢er_mv,
- search_site_cfg,
+ start_mv, search_site_cfg,
/*fine_search_interval=*/0);
av1_set_mv_search_method(&full_ms_params, search_site_cfg,
tpl_sf->search_method);