Optimize mode estimation function in tpl
When motion search is limited to full-pel precision
in the tpl module, avoided the following:
- Full-pel MV error calculation in the sub-pel search module
- Copy of inter prediction data
Instruction Count
cpu Reduction(%)
5 0.517
6 0.687
Change-Id: I24db8258176e56191bac27ea39b13e6382b995d0
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index c28b6e9..436ede6 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -283,9 +283,16 @@
av1_set_mv_search_method(&full_ms_params, search_site_cfg,
tpl_sf->search_method);
- av1_full_pixel_search(start_mv, &full_ms_params, step_param,
- cond_cost_list(cpi, cost_list), &best_mv->as_fullmv,
- NULL);
+ bestsme = av1_full_pixel_search(start_mv, &full_ms_params, step_param,
+ cond_cost_list(cpi, cost_list),
+ &best_mv->as_fullmv, NULL);
+
+ // When sub-pel motion search is skipped, populate sub-pel precision MV and
+ // return.
+ if (tpl_sf->subpel_force_stop == FULL_PEL) {
+ best_mv->as_mv = get_mv_from_fullmv(&best_mv->as_fullmv);
+ return bestsme;
+ }
SUBPEL_MOTION_SEARCH_PARAMS ms_params;
av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, ¢er_mv,
@@ -444,6 +451,7 @@
TplDepStats *tpl_stats) {
AV1_COMMON *cm = &cpi->common;
const GF_GROUP *gf_group = &cpi->ppi->gf_group;
+ TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf;
(void)gf_group;
@@ -472,7 +480,7 @@
mi_row * MI_SIZE * tpl_frame->rec_picture->y_stride + mi_col * MI_SIZE;
uint8_t *dst_buffer = tpl_frame->rec_picture->y_buffer + dst_mb_offset;
int dst_buffer_stride = tpl_frame->rec_picture->y_stride;
- int use_y_only_rate_distortion = cpi->sf.tpl_sf.use_y_only_rate_distortion;
+ int use_y_only_rate_distortion = tpl_sf->use_y_only_rate_distortion;
uint8_t *rec_buffer_pool[3] = {
tpl_frame->rec_picture->y_buffer,
@@ -551,7 +559,7 @@
// if cpi->sf.tpl_sf.prune_intra_modes is on, then search only DC_PRED,
// H_PRED, and V_PRED
const PREDICTION_MODE last_intra_mode =
- cpi->sf.tpl_sf.prune_intra_modes ? D45_PRED : INTRA_MODE_END;
+ tpl_sf->prune_intra_modes ? D45_PRED : INTRA_MODE_END;
const SequenceHeader *seq_params = cm->seq_params;
for (PREDICTION_MODE mode = INTRA_MODE_START; mode < last_intra_mode;
++mode) {
@@ -657,7 +665,7 @@
TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos(
mi_row - mi_height, mi_col, tpl_frame->stride, block_mis_log2)];
if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count,
- cpi->sf.tpl_sf.skip_alike_starting_mv)) {
+ tpl_sf->skip_alike_starting_mv)) {
center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int;
++refmv_count;
}
@@ -667,7 +675,7 @@
TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos(
mi_row, mi_col - mi_width, tpl_frame->stride, block_mis_log2)];
if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count,
- cpi->sf.tpl_sf.skip_alike_starting_mv)) {
+ tpl_sf->skip_alike_starting_mv)) {
center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int;
++refmv_count;
}
@@ -678,7 +686,7 @@
mi_row - mi_height, mi_col + mi_width, tpl_frame->stride,
block_mis_log2)];
if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count,
- cpi->sf.tpl_sf.skip_alike_starting_mv)) {
+ tpl_sf->skip_alike_starting_mv)) {
center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int;
++refmv_count;
}
@@ -697,13 +705,13 @@
rf_idx + LAST_FRAME);
if (tp_mv.as_int != INVALID_MV &&
!is_alike_mv(tp_mv, center_mvs + 1, refmv_count - 1,
- cpi->sf.tpl_sf.skip_alike_starting_mv)) {
+ tpl_sf->skip_alike_starting_mv)) {
center_mvs[0].mv = tp_mv;
}
}
// Prune starting mvs
- if (cpi->sf.tpl_sf.prune_starting_mv) {
+ if (tpl_sf->prune_starting_mv) {
// Get each center mv's sad.
for (idx = 0; idx < refmv_count; ++idx) {
FULLPEL_MV mv = get_fullmv_from_mv(¢er_mvs[idx].mv.as_mv);
@@ -717,7 +725,7 @@
if (refmv_count > 1) {
qsort(center_mvs, refmv_count, sizeof(center_mvs[0]), compare_sad);
}
- refmv_count = AOMMIN(4 - cpi->sf.tpl_sf.prune_starting_mv, refmv_count);
+ refmv_count = AOMMIN(4 - tpl_sf->prune_starting_mv, refmv_count);
// Further reduce number of refmv based on sad difference.
if (refmv_count > 1) {
int last_sad = center_mvs[refmv_count - 1].sad;
@@ -742,21 +750,31 @@
tpl_stats->mv[rf_idx].as_int = best_rfidx_mv.as_int;
single_mv[rf_idx] = best_rfidx_mv;
- struct buf_2d ref_buf = { NULL, ref_frame_ptr->y_buffer,
- ref_frame_ptr->y_width, ref_frame_ptr->y_height,
- ref_frame_ptr->y_stride };
- InterPredParams inter_pred_params;
- av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE,
- mi_col * MI_SIZE, 0, 0, xd->bd, is_cur_buf_hbd(xd), 0,
- &tpl_data->sf, &ref_buf, kernel);
- inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd);
+ if (tpl_sf->subpel_force_stop != FULL_PEL) {
+ struct buf_2d ref_buf = { NULL, ref_frame_ptr->y_buffer,
+ ref_frame_ptr->y_width, ref_frame_ptr->y_height,
+ ref_frame_ptr->y_stride };
+ InterPredParams inter_pred_params;
+ av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE,
+ mi_col * MI_SIZE, 0, 0, xd->bd, is_cur_buf_hbd(xd),
+ 0, &tpl_data->sf, &ref_buf, kernel);
+ inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd);
- av1_enc_build_one_inter_predictor(predictor, bw, &best_rfidx_mv.as_mv,
- &inter_pred_params);
+ av1_enc_build_one_inter_predictor(predictor, bw, &best_rfidx_mv.as_mv,
+ &inter_pred_params);
- inter_cost =
- tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride,
- predictor, bw, coeff, bw, bh, tx_size);
+ inter_cost =
+ tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride,
+ predictor, bw, coeff, bw, bh, tx_size);
+ } else {
+ const FULLPEL_MV best_fullmv = get_fullmv_from_mv(&best_rfidx_mv.as_mv);
+ // Since sub-pel motion search is not performed, use the prediction pixels
+ // directly from the reference block ref_mb
+ inter_cost = tpl_get_satd_cost(
+ bd_info, src_diff, bw, src_mb_buffer, src_stride,
+ &ref_mb[best_fullmv.row * ref_stride + best_fullmv.col], ref_stride,
+ coeff, bw, bh, tx_size);
+ }
// Store inter cost for each ref frame
tpl_stats->pred_error[rf_idx] = AOMMAX(1, inter_cost);
@@ -783,7 +801,7 @@
int start_rf = 0;
int end_rf = 3;
- if (!cpi->sf.tpl_sf.allow_compound_pred) end_rf = 0;
+ if (!tpl_sf->allow_compound_pred) end_rf = 0;
if (cpi->third_pass_ctx &&
frame_offset < cpi->third_pass_ctx->frame_info_count &&
tpl_data->frame_idx < gf_group->size) {
@@ -803,10 +821,10 @@
break;
}
}
- if (!found || !cpi->sf.tpl_sf.allow_compound_pred) {
+ if (!found || !tpl_sf->allow_compound_pred) {
comp_ref_frames[2][0] = this_mi->ref_frame[0] - LAST_FRAME;
comp_ref_frames[2][1] = this_mi->ref_frame[1] - LAST_FRAME;
- if (!cpi->sf.tpl_sf.allow_compound_pred) {
+ if (!tpl_sf->allow_compound_pred) {
start_rf = 2;
end_rf = 3;
}