Introduce reuse_compound_type_decision speed feature
Reuse compound type decision when match is found.
This speed feature is enabled for preset >= 3.
Instruction Count
cpu-used Reduction Quality Loss
3 0.50 0.0045%
4 0.55 -0.0184%
STATS_CHANGED
Change-Id: Ide80d68742e011b0e0bc973e1861778bbc72d2a2
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3a162a8..de61c56 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -193,12 +193,14 @@
int64_t dist[COMPOUND_TYPES];
int32_t model_rate[COMPOUND_TYPES];
int64_t model_dist[COMPOUND_TYPES];
+ int comp_rs2[COMPOUND_TYPES];
int_mv mv[2];
MV_REFERENCE_FRAME ref_frames[2];
PREDICTION_MODE mode;
int_interpfilters filter;
int ref_mv_idx;
int is_global[2];
+ INTERINTER_COMPOUND_DATA interinter_comp;
} COMP_RD_STATS;
// Struct for buffers used by compound_type_rd() function.
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0a82562..81536ab 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7537,11 +7537,13 @@
static INLINE void backup_stats(COMPOUND_TYPE cur_type, int32_t *comp_rate,
int64_t *comp_dist, int32_t *comp_model_rate,
int64_t *comp_model_dist, int rate_sum,
- int64_t dist_sum, RD_STATS *rd_stats) {
+ int64_t dist_sum, RD_STATS *rd_stats,
+ int *comp_rs2, int rs2) {
comp_rate[cur_type] = rd_stats->rate;
comp_dist[cur_type] = rd_stats->dist;
comp_model_rate[cur_type] = rate_sum;
comp_model_dist[cur_type] = dist_sum;
+ comp_rs2[cur_type] = rs2;
}
static int64_t masked_compound_type_rd(
@@ -7552,7 +7554,7 @@
int mode_rate, int64_t rd_thresh, int *calc_pred_masked_compound,
int32_t *comp_rate, int64_t *comp_dist, int32_t *comp_model_rate,
int64_t *comp_model_dist, const int64_t comp_best_model_rd,
- int64_t *const comp_model_rd_cur) {
+ int64_t *const comp_model_rd_cur, int *comp_rs2) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
@@ -7699,7 +7701,8 @@
RDCOST(x->rdmult, *rs2 + *out_rate_mv + rd_stats.rate, rd_stats.dist);
// Backup rate and distortion for future reuse
backup_stats(compound_type, comp_rate, comp_dist, comp_model_rate,
- comp_model_dist, rate_sum, dist_sum, &rd_stats);
+ comp_model_dist, rate_sum, dist_sum, &rd_stats, comp_rs2,
+ *rs2);
}
} else {
// Reuse data as matching record is found
@@ -8365,10 +8368,13 @@
}
// Checks if characteristics of search match
-static INLINE int is_comp_rd_match(
- const AV1_COMP *const cpi, const MACROBLOCK *const x,
- const COMP_RD_STATS *st, const MB_MODE_INFO *const mi, int32_t *comp_rate,
- int64_t *comp_dist, int32_t *comp_model_rate, int64_t *comp_model_dist) {
+static INLINE int is_comp_rd_match(const AV1_COMP *const cpi,
+ const MACROBLOCK *const x,
+ const COMP_RD_STATS *st,
+ const MB_MODE_INFO *const mi,
+ int32_t *comp_rate, int64_t *comp_dist,
+ int32_t *comp_model_rate,
+ int64_t *comp_model_dist, int *comp_rs2) {
// TODO(ranjit): Ensure that compound type search use regular filter always
// and check if following check can be removed
// Check if interp filter matches with previous case
@@ -8392,6 +8398,7 @@
comp_dist[comp_type] = st->dist[comp_type];
comp_model_rate[comp_type] = st->model_rate[comp_type];
comp_model_dist[comp_type] = st->model_dist[comp_type];
+ comp_rs2[comp_type] = st->comp_rs2[comp_type];
}
// For compound wedge/segment, reuse data only if NEWMV is not present in
@@ -8407,6 +8414,8 @@
sizeof(comp_model_rate[COMPOUND_WEDGE]) * 2);
memcpy(&comp_model_dist[COMPOUND_WEDGE], &st->model_dist[COMPOUND_WEDGE],
sizeof(comp_model_dist[COMPOUND_WEDGE]) * 2);
+ memcpy(&comp_rs2[COMPOUND_WEDGE], &st->comp_rs2[COMPOUND_WEDGE],
+ sizeof(comp_rs2[COMPOUND_WEDGE]) * 2);
}
return 1;
}
@@ -8449,10 +8458,13 @@
const MB_MODE_INFO *const mbmi,
int32_t *comp_rate, int64_t *comp_dist,
int32_t *comp_model_rate,
- int64_t *comp_model_dist) {
+ int64_t *comp_model_dist, int *comp_rs2,
+ int *match_index) {
for (int j = 0; j < x->comp_rd_stats_idx; ++j) {
if (is_comp_rd_match(cpi, x, &x->comp_rd_stats[j], mbmi, comp_rate,
- comp_dist, comp_model_rate, comp_model_dist)) {
+ comp_dist, comp_model_rate, comp_model_dist,
+ comp_rs2)) {
+ *match_index = j;
return 1;
}
}
@@ -8481,7 +8493,7 @@
static INLINE void save_comp_rd_search_stat(
MACROBLOCK *x, const MB_MODE_INFO *const mbmi, const int32_t *comp_rate,
const int64_t *comp_dist, const int32_t *comp_model_rate,
- const int64_t *comp_model_dist, const int_mv *cur_mv) {
+ const int64_t *comp_model_dist, const int_mv *cur_mv, const int *comp_rs2) {
const int offset = x->comp_rd_stats_idx;
if (offset < MAX_COMP_RD_STATS) {
COMP_RD_STATS *const rd_stats = x->comp_rd_stats + offset;
@@ -8489,6 +8501,7 @@
memcpy(rd_stats->dist, comp_dist, sizeof(rd_stats->dist));
memcpy(rd_stats->model_rate, comp_model_rate, sizeof(rd_stats->model_rate));
memcpy(rd_stats->model_dist, comp_model_dist, sizeof(rd_stats->model_dist));
+ memcpy(rd_stats->comp_rs2, comp_rs2, sizeof(rd_stats->comp_rs2));
memcpy(rd_stats->mv, cur_mv, sizeof(rd_stats->mv));
memcpy(rd_stats->ref_frames, mbmi->ref_frame, sizeof(rd_stats->ref_frames));
rd_stats->mode = mbmi->mode;
@@ -8500,6 +8513,8 @@
&xd->global_motion[mbmi->ref_frame[i]];
rd_stats->is_global[i] = is_global_mv_block(mbmi, wm->wmtype);
}
+ memcpy(&rd_stats->interinter_comp, &mbmi->interinter_comp,
+ sizeof(rd_stats->interinter_comp));
++x->comp_rd_stats_idx;
}
}
@@ -9697,6 +9712,29 @@
mbmi->compound_idx = (cur_type != COMPOUND_DISTWTD);
}
+// When match is found, populate the compound type data
+// and calculate the rd cost using the stored stats and
+// update the mbmi appropriately.
+static INLINE int populate_reuse_comp_type_data(
+ const MACROBLOCK *x, MB_MODE_INFO *mbmi,
+ BEST_COMP_TYPE_STATS *best_type_stats, int_mv *cur_mv, int32_t *comp_rate,
+ int64_t *comp_dist, int *comp_rs2, int *rate_mv, int64_t *rd,
+ int match_index) {
+ const int winner_comp_type =
+ x->comp_rd_stats[match_index].interinter_comp.type;
+ if (comp_rate[winner_comp_type] == INT_MAX)
+ return best_type_stats->best_compmode_interinter_cost;
+ update_mbmi_for_compound_type(mbmi, winner_comp_type);
+ mbmi->interinter_comp = x->comp_rd_stats[match_index].interinter_comp;
+ *rd = RDCOST(
+ x->rdmult,
+ comp_rs2[winner_comp_type] + *rate_mv + comp_rate[winner_comp_type],
+ comp_dist[winner_comp_type]);
+ mbmi->mv[0].as_int = cur_mv[0].as_int;
+ mbmi->mv[1].as_int = cur_mv[1].as_int;
+ return comp_rs2[winner_comp_type];
+}
+
// Updates rd cost and relevant compound type data for the best compound type
static INLINE void update_best_info(const MB_MODE_INFO *const mbmi, int64_t *rd,
BEST_COMP_TYPE_STATS *best_type_stats,
@@ -9822,12 +9860,15 @@
int64_t comp_dist[COMPOUND_TYPES] = { INT64_MAX, INT64_MAX, INT64_MAX,
INT64_MAX };
int32_t comp_rate[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+ int comp_rs2[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
int32_t comp_model_rate[COMPOUND_TYPES] = { INT_MAX, INT_MAX, INT_MAX,
INT_MAX };
int64_t comp_model_dist[COMPOUND_TYPES] = { INT64_MAX, INT64_MAX, INT64_MAX,
INT64_MAX };
- const int match_found = find_comp_rd_in_stats(
- cpi, x, mbmi, comp_rate, comp_dist, comp_model_rate, comp_model_dist);
+ int match_index = 0;
+ const int match_found =
+ find_comp_rd_in_stats(cpi, x, mbmi, comp_rate, comp_dist, comp_model_rate,
+ comp_model_dist, comp_rs2, &match_index);
best_mv[0].as_int = cur_mv[0].as_int;
best_mv[1].as_int = cur_mv[1].as_int;
*rd = INT64_MAX;
@@ -9861,6 +9902,13 @@
const int mi_row = xd->mi_row;
const int mi_col = xd->mi_col;
+ // If the match is found, calculate the rd cost using the
+ // stored stats and update the mbmi appropriately.
+ if (match_found && cpi->sf.inter_sf.reuse_compound_type_decision) {
+ return populate_reuse_comp_type_data(x, mbmi, &best_type_stats, cur_mv,
+ comp_rate, comp_dist, comp_rs2,
+ rate_mv, rd, match_index);
+ }
// Special handling if both compound_average and compound_distwtd
// are to be searched. In this case, first estimate between the two
// modes and then call estimate_yrd_for_sb() only for the better of
@@ -9928,7 +9976,7 @@
// Backup rate and distortion for future reuse
backup_stats(best_type, comp_rate, comp_dist, comp_model_rate,
comp_model_dist, est_rate[best_type], est_dist[best_type],
- &est_rd_stats);
+ &est_rd_stats, comp_rs2, rs2);
comp_model_rd_cur = est_rd;
}
if (best_type == COMPOUND_AVERAGE) restore_dst_buf(xd, *tmp_dst, 1);
@@ -9978,7 +10026,8 @@
// Backup rate and distortion for future reuse
backup_stats(cur_type, comp_rate, comp_dist, comp_model_rate,
- comp_model_dist, rate_sum, dist_sum, &est_rd_stats);
+ comp_model_dist, rate_sum, dist_sum, &est_rd_stats,
+ comp_rs2, rs2);
}
} else {
// Calculate RD cost based on stored stats
@@ -10009,7 +10058,7 @@
&tmp_rate_mv, preds0, preds1, buffers->residual1, buffers->diff10,
strides, rd_stats->rate, tmp_rd_thresh, &calc_pred_masked_compound,
comp_rate, comp_dist, comp_model_rate, comp_model_dist,
- best_type_stats.comp_best_model_rd, &comp_model_rd_cur);
+ best_type_stats.comp_best_model_rd, &comp_model_rd_cur, comp_rs2);
}
}
// Update stats for best compound type
@@ -10046,7 +10095,7 @@
restore_dst_buf(xd, *orig_dst, 1);
if (!match_found)
save_comp_rd_search_stat(x, mbmi, comp_rate, comp_dist, comp_model_rate,
- comp_model_dist, cur_mv);
+ comp_model_dist, cur_mv, comp_rs2);
return best_type_stats.best_compmode_interinter_cost;
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index fa90d88..afc7174 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -435,6 +435,7 @@
sf->inter_sf.prune_motion_mode_level = boosted ? 2 : 3;
if (cpi->oxcf.enable_smooth_interintra)
sf->inter_sf.disable_smooth_interintra = boosted ? 0 : 1;
+ sf->inter_sf.reuse_compound_type_decision = 1;
sf->intra_sf.prune_palette_search_level = 2;
@@ -941,6 +942,7 @@
inter_sf->disable_interinter_wedge = 0;
inter_sf->prune_ref_mv_idx_search = 0;
inter_sf->prune_warped_prob_thresh = 0;
+ inter_sf->reuse_compound_type_decision = 0;
}
static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 84066ff..4cf387c 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -628,6 +628,11 @@
// 0: no breakout
// 1: use model based rd breakout
int model_based_post_interp_filter_breakout;
+
+ // Reuse compound type rd decision when exact match is found
+ // 0: No reuse
+ // 1: Reuse the compound type decision
+ int reuse_compound_type_decision;
} INTER_MODE_SPEED_FEATURES;
typedef struct INTERP_FILTER_SPEED_FEATURES {