AV1 RT: Limit the number of modes with TX search
8-10% speedup on speed 6 QVGA.
1.5% BDRate loss overall but 6% on one outlier.
The feature is turned off for now.
Change-Id: Ieb82b82243e577a922ca50a06a1b2219be6170c4
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index faea6de..7ebef47 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -11882,27 +11882,30 @@
AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
}
// ref_frame = ALTREF_FRAME
- for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
- x->mbmi_ext->mode_context[ref_frame] = 0;
- mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
- const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
- if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
- (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
- continue;
- }
-
- if (mbmi->partition != PARTITION_NONE &&
- mbmi->partition != PARTITION_SPLIT) {
- if (skip_ref_frame_mask & (1 << ref_frame)) {
+ if (!cpi->sf.use_real_time_ref_set) { // No second reference on RT ref set,
+ // so no need to initialize
+ for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
+ x->mbmi_ext->mode_context[ref_frame] = 0;
+ mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
+ const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
+ if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
+ (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
continue;
}
+
+ if (mbmi->partition != PARTITION_NONE &&
+ mbmi->partition != PARTITION_SPLIT) {
+ if (skip_ref_frame_mask & (1 << ref_frame)) {
+ continue;
+ }
+ }
+ av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
+ xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
+ mi_row, mi_col, mbmi_ext->mode_context);
+ // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
+ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
+ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
}
- av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
- xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
- mi_row, mi_col, mbmi_ext->mode_context);
- // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
- // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
- av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
}
av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
@@ -12966,7 +12969,8 @@
const int do_tx_search =
!((cpi->sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
(cpi->sf.inter_mode_rd_model_estimation == 2 &&
- num_pels_log2_lookup[bsize] > 8));
+ num_pels_log2_lookup[bsize] > 8) ||
+ cpi->sf.force_tx_search_off);
InterModesInfo *inter_modes_info = x->inter_modes_info;
inter_modes_info->num = 0;
@@ -13211,7 +13215,10 @@
inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
search_state.best_rd = best_rd_so_far;
search_state.best_mode_index = THR_INVALID;
-
+ inter_modes_info->num =
+ inter_modes_info->num < cpi->sf.num_inter_modes_for_tx_search
+ ? inter_modes_info->num
+ : cpi->sf.num_inter_modes_for_tx_search;
const int64_t top_est_rd =
inter_modes_info->num > 0
? inter_modes_info
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 4f6109b..7addf4d 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -648,6 +648,11 @@
sf->max_intra_bsize = BLOCK_16X16;
sf->use_inter_txb_hash = 0;
sf->skip_interp_filter_search = 1;
+#if 0
+ // Turning this off until we agree that tradeoff qualuty vs speed is good.
+ sf->force_tx_search_off = 1;
+ sf->num_inter_modes_for_tx_search = 2;
+#endif
}
if (speed >= 7) {
sf->lpf_pick = LPF_PICK_FROM_Q;
@@ -825,6 +830,8 @@
// TODO(yunqing): turn it on for speed 0 if there is gain.
sf->adaptive_overlay_encoding = 0;
sf->skip_interp_filter_search = 0;
+ sf->force_tx_search_off = 0;
+ sf->num_inter_modes_for_tx_search = INT_MAX;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 6b1e0bc..2478d97 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -798,6 +798,12 @@
// For nonrd: use block_yrd for rd cost in interpolation filter search.
int nonrd_use_blockyrd_interp_filter;
+
+ // Forces TX search off for RDCost calulation.
+ int force_tx_search_off;
+
+ // Number of best inter modes to search transform. INT_MAX - search all.
+ int num_inter_modes_for_tx_search;
} SPEED_FEATURES;
struct AV1_COMP;