Add speed feature to switch search pattern Use 8-pts motion estimation in speed 3 and above. At speed 5, the encoding time for bus_cif at 200 kbps changes from 314s to 281s, ~10% faster. The corresponding compression performance loss is 0.5%, as is consistent with speed 1 settings observation. STATS_CHANGED Change-Id: I784c8c41d1efd612cc7ece9a36dea9c52a70b07b
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 8875287..47b90ac 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -4054,14 +4054,9 @@ av1_init_dsmotion_compensation(&cpi->ss_cfg[SS_CFG_LOOKAHEAD], y_stride_src); } else { - // Update the offsets in search_sites as y_stride can change due to scaled - // references. This update allows NSTEP to be used on scaled references as - // long as sf.mv.search_method is not DIAMOND. Currently in the codebae, - // sf.mv.search_method is never set to DIAMOND. av1_init3smotion_compensation(&cpi->ss_cfg[SS_CFG_SRC], y_stride); av1_init3smotion_compensation(&cpi->ss_cfg[SS_CFG_LOOKAHEAD], y_stride_src); } - av1_init_motion_fpf(&cpi->ss_cfg[SS_CFG_FPF], y_stride); } @@ -4116,6 +4111,9 @@ seq_params->subsampling_y = subsampling_y; seq_params->use_highbitdepth = use_highbitdepth; + av1_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed); + av1_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed); + alloc_altref_frame_buffer(cpi); init_ref_frame_bufs(cpi); alloc_util_frame_buffers(cpi);
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c index 893ef9d..1a4fd61 100644 --- a/av1/encoder/mcomp.c +++ b/av1/encoder/mcomp.c
@@ -104,30 +104,33 @@ } void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) { - int len, ss_count = 0; + int ss_count = 0; int stage_index = MAX_MVSEARCH_STEPS - 1; cfg->ss[stage_index][0].mv.col = cfg->ss[stage_index][0].mv.row = 0; cfg->ss[stage_index][0].offset = 0; cfg->stride = stride; - for (len = MAX_FIRST_STEP; len > 0; len /= 2) { - // Generate offsets for 4 search sites per step. - const MV ss_mvs[5] = { - { 0, 0 }, { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } + for (int radius = MAX_FIRST_STEP; radius > 0; radius /= 2) { + int num_search_pts = 8; + + const MV ss_mvs[13] = { + { 0, 0 }, { -radius, 0 }, { radius, 0 }, + { 0, -radius }, { 0, radius }, { -radius, -radius }, + { radius, radius }, { -radius, radius }, { radius, -radius }, }; + int i; - for (i = 0; i < 5; ++i) { + for (i = 0; i <= num_search_pts; ++i) { search_site *const ss = &cfg->ss[stage_index][i]; ss->mv = ss_mvs[i]; ss->offset = ss->mv.row * stride + ss->mv.col; } - cfg->searches_per_step[stage_index] = 4; - cfg->radius[stage_index] = len; + cfg->searches_per_step[stage_index] = num_search_pts; + cfg->radius[stage_index] = radius; --stage_index; ++ss_count; } - cfg->ss_count = ss_count; } @@ -180,7 +183,7 @@ int stage_index = 0; cfg->stride = stride; int radius = 1; - for (stage_index = 0; stage_index < 15; ++stage_index) { + for (stage_index = 0; stage_index < 12; ++stage_index) { int tan_radius = AOMMAX((int)(0.41 * radius), 1); int num_search_pts = 12; if (radius == 1) num_search_pts = 8; @@ -208,7 +211,7 @@ cfg->searches_per_step[stage_index] = num_search_pts; cfg->radius[stage_index] = radius; ++ss_count; - radius = (int)AOMMAX((radius * 1.4 + 0.5), radius + 1); + radius = (int)AOMMAX((radius * 1.5 + 0.5), radius + 1); } cfg->ss_count = ss_count; } @@ -2336,6 +2339,7 @@ fn_ptr, 1, ref_mv); break; case NSTEP: + case DIAMOND: var = full_pixel_diamond(x, mvp_full, step_param, use_var, error_per_bit, cfg->ss_count - 1 - step_param, cost_list, fn_ptr, ref_mv, cfg, NULL, NULL, 0, 0);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index 06be216..9d192f9 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -422,7 +422,7 @@ // sf->mv_sf.adaptive_motion_search = 1; sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED; sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS; - + sf->mv_sf.search_method = DIAMOND; sf->inter_sf.disable_sb_level_mv_cost_upd = 1; // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine // it with cpi->sf.disable_wedge_search_var_thresh.