Correct skip rdy computation for txfm_rd_gate_level sf
This CL fixes the skip rdy computation for the txfm_rd_gate_level speed
feature by properly scaling the luma sse.
STATS_CHANGED
Change-Id: I16c3f62754abf9cf4938bc1f2919f17559407d33
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index 83ecdee..fd0f61c 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -1041,7 +1041,7 @@
*rs2 += get_interinter_compound_mask_rate(x, mbmi);
best_rd_cur += RDCOST(x->rdmult, *rs2 + rate_mv, 0);
assert(cur_sse != UINT64_MAX);
- int64_t skip_rd_cur = RDCOST(x->rdmult, *rs2 + rate_mv, cur_sse);
+ int64_t skip_rd_cur = RDCOST(x->rdmult, *rs2 + rate_mv, (cur_sse << 4));
// Although the true rate_mv might be different after motion search, but it
// is unlikely to be the best mode considering the transform rd cost and other
@@ -1329,7 +1329,8 @@
int eval_txfm = 1;
// Check if the mode is good enough based on skip rd
if (cpi->sf.inter_sf.txfm_rd_gate_level) {
- int64_t skip_rd = RDCOST(x->rdmult, rs2 + *rate_mv, sse_y[best_type]);
+ int64_t skip_rd =
+ RDCOST(x->rdmult, rs2 + *rate_mv, (sse_y[best_type] << 4));
eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd, skip_rd,
cpi->sf.inter_sf.txfm_rd_gate_level, 1);
}
@@ -1387,7 +1388,7 @@
// Check if the mode is good enough based on skip rd
if (cpi->sf.inter_sf.txfm_rd_gate_level) {
int64_t sse_y = compute_sse_plane(x, xd, PLANE_TYPE_Y, bsize);
- int64_t skip_rd = RDCOST(x->rdmult, rs2 + *rate_mv, sse_y);
+ int64_t skip_rd = RDCOST(x->rdmult, rs2 + *rate_mv, (sse_y << 4));
eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd, skip_rd,
cpi->sf.inter_sf.txfm_rd_gate_level, 1);
}
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 41de894..cbd3339 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1502,8 +1502,9 @@
rd_stats->rdcost = est_rd;
if (rd_stats->rdcost < *best_est_rd) {
*best_est_rd = rd_stats->rdcost;
+ assert(sse_y >= 0);
ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
- ? RDCOST(x->rdmult, mode_rate, sse_y)
+ ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
: INT64_MAX;
}
if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
@@ -1531,7 +1532,7 @@
// model_rd_sb_fn and compound type rd
sse_y = ROUND_POWER_OF_TWO(sse_y, (xd->bd - 8) * 2);
skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
- skip_rdy = RDCOST(x->rdmult, rd_stats->rate, sse_y);
+ skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
cpi->sf.inter_sf.txfm_rd_gate_level, 0);
if (!eval_txfm) continue;
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index 4d0b05e..341ba0d 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -334,24 +334,31 @@
// Derive aggressiveness factor for gating the transform search
// Lower value indicates more aggressiveness. Be more conservative (high
// value) for (i) low quantizers (ii) regions where prediction is poor
- const int scale[4] = { INT_MAX, 3, 3, 2 };
-
- int aggr_factor =
- AOMMAX(1, ((MAXQ - x->qindex) * 2 + QINDEX_RANGE / 2) >> QINDEX_BITS);
+ const int scale[5] = { INT_MAX, 4, 3, 3, 2 };
+ const int qslope = 2 * (!is_luma_only);
+ int aggr_factor = 1;
+ if (!is_luma_only) {
+ aggr_factor = AOMMAX(
+ 1, ((MAXQ - x->qindex) * qslope + QINDEX_RANGE / 2) >> QINDEX_BITS);
+ }
if (best_skip_rd >
(x->source_variance << (num_pels_log2_lookup[bsize] + RDDIV_BITS)))
aggr_factor *= scale[level];
+ // For level setting 1, be more conservative for luma only case even when
+ // prediction is good
+ else if ((level <= 1) && !is_luma_only)
+ aggr_factor *= 2;
// Be more conservative for luma only cases (called from compound type rd)
// since best_skip_rd is computed after and skip_rd is computed (with 8-bit
// prediction signals blended for WEDGE/DIFFWTD rather than 16-bit) before
// interpolation filter search
- const int luma_mul[4] = { INT_MAX, 16, 15, 11 };
- int mul_factor = is_luma_only ? luma_mul[level] : 8;
+ const int luma_mul[5] = { INT_MAX, 32, 29, 20, 17 };
+ int mul_factor = is_luma_only ? luma_mul[level] : 16;
int64_t rd_thresh =
(best_skip_rd == INT64_MAX)
? best_skip_rd
- : (int64_t)(best_skip_rd * aggr_factor * mul_factor >> 3);
+ : (int64_t)(best_skip_rd * aggr_factor * mul_factor >> 4);
if (skip_rd > rd_thresh) eval_txfm = 0;
return eval_txfm;
}
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6035876..5117f52 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -464,7 +464,9 @@
sf->inter_sf.disable_smooth_interintra = boosted ? 0 : 1;
sf->inter_sf.reuse_compound_type_decision = 1;
sf->inter_sf.txfm_rd_gate_level =
- (is_boosted_arf2_bwd_type || cm->allow_screen_content_tools) ? 0 : 1;
+ (boosted || cm->allow_screen_content_tools)
+ ? 0
+ : (is_boosted_arf2_bwd_type ? 1 : 2);
sf->intra_sf.prune_palette_search_level = 2;
@@ -510,7 +512,7 @@
sf->inter_sf.alt_ref_search_fp = 1;
sf->inter_sf.prune_ref_mv_idx_search = 1;
sf->inter_sf.txfm_rd_gate_level =
- (boosted || cm->allow_screen_content_tools) ? 0 : 2;
+ (boosted || cm->allow_screen_content_tools) ? 0 : 3;
sf->inter_sf.disable_smooth_interintra = 1;
@@ -577,7 +579,7 @@
sf->inter_sf.disable_obmc = 1;
sf->inter_sf.disable_onesided_comp = 1;
sf->inter_sf.txfm_rd_gate_level =
- (boosted || cm->allow_screen_content_tools) ? 0 : 3;
+ (boosted || cm->allow_screen_content_tools) ? 0 : 4;
sf->inter_sf.prune_inter_modes_if_skippable = 1;
sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;