Introduce speed feature motion_mode_for_winner_cand
Speed feature motion_mode_for_winner_cand has been introduced
to perform SIMPLE_TRANSLATION during inter mode evaluation and
store the winner candidates. For these winner candidates, other
motion modes (non simple translation) would be evaluated.
This speed feature is enabled for speed>=3.
Instruction Count
cpu-used Reduction Quality Loss
4 3.48% 0.22%
3 2.82% 0.20%
STATS_CHANGED
Change-Id: I8f49417c959f1c697cba88cc64371d2be1805825
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 18749e5..c0c9255 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1104,6 +1104,10 @@
int8_t nearest_past_ref;
int8_t nearest_future_ref;
+ // Indicates the number of simple translation winner modes for exhaustive
+ // motion mode evaluation
+ int num_winner_motion_modes;
+
// TODO(sdeng): consider merge the following arrays.
double *tpl_rdmult_scaling_factors;
double *tpl_sb_rdmult_scaling_factors;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 4140ac7..e1d92d4 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -9788,17 +9788,34 @@
return mv_field_check_ctxt.mv_field_check_result;
}
+static INLINE void update_mode_start_end_index(const AV1_COMP *const cpi,
+ int *mode_index_start,
+ int *mode_index_end,
+ int last_motion_mode_allowed,
+ int interintra_allowed,
+ int eval_motion_mode) {
+ *mode_index_start = (int)SIMPLE_TRANSLATION;
+ *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
+ if (cpi->sf.motion_mode_for_winner_cand) {
+ if (!eval_motion_mode) {
+ *mode_index_end = (int)SIMPLE_TRANSLATION;
+ } else {
+ // Set the start index appropriately to process motion modes other than
+ // simple translation
+ *mode_index_start = 1;
+ }
+ }
+}
+
// TODO(afergs): Refactor the MBMI references in here - there's four
// TODO(afergs): Refactor optional args - add them to a struct or remove
-static int64_t motion_mode_rd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
- MACROBLOCK *const x, BLOCK_SIZE bsize,
- RD_STATS *rd_stats, RD_STATS *rd_stats_y,
- RD_STATS *rd_stats_uv, int *disable_skip,
- HandleInterModeArgs *const args,
- int64_t ref_best_rd, const int *refs,
- int *rate_mv, const BUFFER_SET *orig_dst,
- int64_t *best_est_rd, int do_tx_search,
- InterModesInfo *inter_modes_info) {
+static int64_t motion_mode_rd(
+ const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
+ RD_STATS *rd_stats_uv, int *disable_skip, HandleInterModeArgs *const args,
+ int64_t ref_best_rd, const int *refs, int *rate_mv,
+ const BUFFER_SET *orig_dst, int64_t *best_est_rd, int do_tx_search,
+ InterModesInfo *inter_modes_info, int eval_motion_mode) {
const AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *xd = &x->e_mbd;
@@ -9850,8 +9867,11 @@
: 0;
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
- for (int mode_index = (int)SIMPLE_TRANSLATION;
- mode_index <= (int)last_motion_mode_allowed + interintra_allowed;
+ int mode_index_start, mode_index_end;
+ update_mode_start_end_index(cpi, &mode_index_start, &mode_index_end,
+ last_motion_mode_allowed, interintra_allowed,
+ eval_motion_mode);
+ for (int mode_index = mode_index_start; mode_index <= mode_index_end;
mode_index++) {
if (args->skip_motion_mode && mode_index) continue;
if (cpi->sf.prune_single_motion_modes_by_simple_trans &&
@@ -9921,7 +9941,7 @@
mbmi->mv[0].as_int = x->best_mv.as_int;
tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
}
- if (mbmi->mv[0].as_int != cur_mv) {
+ if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
0, av1_num_planes(cm) - 1);
}
@@ -10862,6 +10882,19 @@
return result;
}
+typedef struct motion_mode_candidate {
+ MB_MODE_INFO mbmi;
+ int rate_mv;
+ int rate2_nocoeff;
+ int skip_motion_mode;
+ int64_t rd_cost;
+} motion_mode_candidate;
+
+typedef struct motion_mode_best_st_candidate {
+ motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
+ int num_motion_mode_cand;
+} motion_mode_best_st_candidate;
+
static int64_t handle_inter_mode(AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *x, BLOCK_SIZE bsize,
RD_STATS *rd_stats, RD_STATS *rd_stats_y,
@@ -10870,7 +10903,8 @@
uint8_t *const tmp_buf,
const CompoundTypeRdBuffers *rd_buffers,
int64_t *best_est_rd, const int do_tx_search,
- InterModesInfo *inter_modes_info) {
+ InterModesInfo *inter_modes_info,
+ motion_mode_candidate *motion_mode_cand) {
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *xd = &x->e_mbd;
@@ -11014,6 +11048,7 @@
if (best_mbmi.ref_mv_idx == i) {
assert(best_rd != INT64_MAX);
best_mbmi.ref_mv_idx = ref_mv_idx;
+ motion_mode_cand->rate_mv = this_rate_mv;
best_rd_stats.rate += this_cost - compare_cost;
best_rd = RDCOST(x->rdmult, best_rd_stats.rate,
best_rd_stats.dist);
@@ -11172,10 +11207,11 @@
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, motion_mode_rd_time);
#endif
+ int rate2_nocoeff = rd_stats->rate;
ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
rd_stats_uv, disable_skip, args, ref_best_rd, refs,
&rate_mv, &orig_dst, best_est_rd, do_tx_search,
- inter_modes_info);
+ inter_modes_info, 0);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, motion_mode_rd_time);
#endif
@@ -11196,6 +11232,8 @@
memcpy(best_blk_skip, x->blk_skip,
sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w);
av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->n4_h * xd->n4_w);
+ motion_mode_cand->rate_mv = rate_mv;
+ motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
}
if (tmp_rd < ref_best_rd) {
@@ -13012,6 +13050,79 @@
ref_frame_rd[frame2] <= ref_frame_rd[0];
}
+static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
+ const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
+ HandleInterModeArgs *const args, TileDataEnc *const tile_data,
+ PICK_MODE_CONTEXT *const ctx,
+ struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
+ const motion_mode_best_st_candidate *const best_motion_mode_cands,
+ int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
+ InterModeSearchState *const search_state) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const int num_planes = av1_num_planes(cm);
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
+ InterModesInfo *const inter_modes_info = x->inter_modes_info;
+ const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
+
+ for (int cand = 0; cand < num_best_cand; cand++) {
+ RD_STATS rd_stats;
+ RD_STATS rd_stats_y;
+ RD_STATS rd_stats_uv;
+ av1_init_rd_stats(&rd_stats);
+ av1_init_rd_stats(&rd_stats_y);
+ av1_init_rd_stats(&rd_stats_uv);
+ int disable_skip = 0, rate_mv;
+
+ rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
+ args->skip_motion_mode =
+ best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
+ *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
+ rd_stats.rate =
+ best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
+
+ // Continue if the best candidate is compound.
+ if (!is_inter_singleref_mode(mbmi->mode)) continue;
+
+ x->skip = 0;
+ const int mode_index = get_prediction_mode_idx(
+ mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ int refs[2] = { mbmi->ref_frame[0],
+ (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
+ struct macroblockd_plane *p = xd->plane;
+ const BUFFER_SET orig_dst = {
+ { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
+ { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
+ };
+
+ set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ args->simple_rd_state = x->simple_rd_state[mode_index];
+ // Initialize motion mode to simple translation
+ // Calculation of switchable rate depends on it.
+ mbmi->motion_mode = 0;
+ const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
+ for (int i = 0; i < num_planes; i++) {
+ xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
+ if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
+ }
+
+ int64_t ret_value = motion_mode_rd(
+ cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
+ &disable_skip, args, search_state->best_rd, refs, &rate_mv, &orig_dst,
+ best_est_rd, do_tx_search, inter_modes_info, 1);
+
+ if (ret_value != INT64_MAX) {
+ rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
+ if (rd_stats.rdcost < search_state->best_rd) {
+ const THR_MODES mode_enum = get_prediction_mode_idx(
+ mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
+ update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
+ &rd_stats_uv, mode_enum, x, do_tx_search);
+ }
+ }
+ }
+}
+
void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
MACROBLOCK *x, RD_STATS *rd_cost,
const BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
@@ -13049,6 +13160,14 @@
NULL,
{ { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
0 };
+ const int max_winner_motion_mode_cand = cpi->num_winner_motion_modes;
+ motion_mode_candidate motion_mode_cand;
+ motion_mode_best_st_candidate best_motion_mode_cands;
+ // Initializing the number of motion mode candidates to zero.
+ best_motion_mode_cands.num_motion_mode_cand = 0;
+ for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
+ best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
+
for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
av1_invalid_rd_stats(rd_cost);
@@ -13267,7 +13386,7 @@
int64_t this_rd = handle_inter_mode(
cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
&disable_skip, &args, ref_best_rd, tmp_buf, &x->comp_rd_buffer,
- &best_est_rd, do_tx_search, inter_modes_info);
+ &best_est_rd, do_tx_search, inter_modes_info, &motion_mode_cand);
if (sf->prune_comp_search_by_single_result > 0 &&
is_inter_singleref_mode(this_mode) && args.single_ref_first_pass) {
@@ -13294,6 +13413,41 @@
update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
&rd_stats_uv, mode_enum, x, do_tx_search);
}
+ if (cpi->sf.motion_mode_for_winner_cand) {
+ const int num_motion_mode_cand =
+ best_motion_mode_cands.num_motion_mode_cand;
+ int valid_motion_mode_cand_loc =
+ (num_motion_mode_cand > 0) ? INT32_MAX : 0;
+
+ // find the best location to insert new motion mode candidate
+ for (int j = 0; j < num_motion_mode_cand; j++) {
+ if (this_rd < best_motion_mode_cands.motion_mode_cand[j].rd_cost) {
+ valid_motion_mode_cand_loc = j;
+ break;
+ }
+ }
+
+ if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
+ if (num_motion_mode_cand > 0 &&
+ valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
+ memmove(
+ &best_motion_mode_cands
+ .motion_mode_cand[valid_motion_mode_cand_loc + 1],
+ &best_motion_mode_cands
+ .motion_mode_cand[valid_motion_mode_cand_loc],
+ (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
+ valid_motion_mode_cand_loc) *
+ sizeof(best_motion_mode_cands.motion_mode_cand[0]));
+ motion_mode_cand.mbmi = *mbmi;
+ motion_mode_cand.rd_cost = this_rd;
+ motion_mode_cand.skip_motion_mode = args.skip_motion_mode;
+ best_motion_mode_cands.motion_mode_cand[valid_motion_mode_cand_loc] =
+ motion_mode_cand;
+ best_motion_mode_cands.num_motion_mode_cand =
+ AOMMIN(max_winner_motion_mode_cand,
+ best_motion_mode_cands.num_motion_mode_cand + 1);
+ }
+ }
/* keep record of best compound/single-only prediction */
if (!disable_skip) {
@@ -13328,6 +13482,15 @@
if (x->skip && !comp_pred) break;
}
+ if (cpi->sf.motion_mode_for_winner_cand) {
+ // For the single ref winner candidates, evaluate other motion modes (non
+ // simple translation).
+ evaluate_motion_mode_for_winner_candidates(
+ cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
+ &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
+ &search_state);
+ }
+
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, do_tx_search_time);
#endif
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 3bef55d..add4e41 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -31,6 +31,7 @@
#define INTER_INTRA_RD_THRESH_SHIFT 4
#define COMP_TYPE_RD_THRESH_SCALE 11
#define COMP_TYPE_RD_THRESH_SHIFT 4
+#define MAX_WINNER_MOTION_MODES 10
struct TileInfo;
struct macroblock;
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4979c95..6596384 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -68,6 +68,9 @@
{ 1, 2, 0 },
{ 2, 2, 0 } };
+// Indicates number of winner simple translation modes to be used
+static unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
+
// Threshold values to be used for disabling coeff RD-optimization
// based on block MSE
// TODO(any): Experiment the threshold logic based on variance metric
@@ -407,6 +410,11 @@
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->simple_motion_search_prune_agg = 1;
sf->disable_sb_level_mv_cost_upd = 1;
+ sf->motion_mode_for_winner_cand =
+ boosted
+ ? 0
+ : gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1
+ : 2;
sf->tx_type_search.use_skip_flag_prediction =
cm->allow_screen_content_tools ? 1 : 2;
}
@@ -857,6 +865,7 @@
sf->adaptive_overlay_encoding = 1;
sf->skip_interp_filter_search = 0;
sf->force_tx_search_off = 0;
+ sf->motion_mode_for_winner_cand = 0;
sf->num_inter_modes_for_tx_search = INT_MAX;
for (i = 0; i < TX_SIZES; i++) {
@@ -1028,6 +1037,11 @@
tx_domain_dist_types[cpi->sf.tx_domain_dist_level],
sizeof(cpi->use_transform_domain_distortion));
+ // Update the number of winner motion modes to be used appropriately
+ cpi->num_winner_motion_modes =
+ num_winner_motion_modes[cpi->sf.motion_mode_for_winner_cand];
+ assert(cpi->num_winner_motion_modes <= MAX_WINNER_MOTION_MODES);
+
// assert ensures that coeff_opt_dist_thresholds is accessed correctly
assert(cpi->sf.perform_coeff_opt >= 0 && cpi->sf.perform_coeff_opt < 5);
memcpy(cpi->coeff_opt_dist_threshold,
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index d4e9070..eca84e9 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -831,6 +831,11 @@
// Disable interinter_wedge
int disable_interinter_wedge;
+
+ // Motion mode for winner candidates:
+ // 0: speed feature OFF
+ // 1 / 2 : Use configured number of winner candidates
+ int motion_mode_for_winner_cand;
} SPEED_FEATURES;
struct AV1_COMP;
diff --git a/build/cmake/aom_configure.cmake b/build/cmake/aom_configure.cmake
index a89336b..6705c9f 100644
--- a/build/cmake/aom_configure.cmake
+++ b/build/cmake/aom_configure.cmake
@@ -291,7 +291,7 @@
add_c_flag_if_supported("-Wstack-usage=170000")
add_cxx_flag_if_supported("-Wstack-usage=270000")
elseif(CONFIG_RD_DEBUG) # Another case where higher stack usage is expected.
- add_c_flag_if_supported("-Wstack-usage=111000")
+ add_c_flag_if_supported("-Wstack-usage=117000")
add_cxx_flag_if_supported("-Wstack-usage=240000")
else()
add_c_flag_if_supported("-Wstack-usage=100000")