Refine luma transform rd evaluation
Luma transform rd cost is refined to be precise at block
level
For speed=1 preset, 6.4% encode time reduction is seen for
10 frames of parkrun 720p50 content and 0.01% BD-rate
improvement is seen in AWCY tests.
STATS_CHANGED
Change-Id: Id9e496587e54ff8cbf2522c8d75a17b4fa2b3ea1
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0b5f22f..5cf61cd 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5080,7 +5080,7 @@
TXB_RD_INFO_NODE *rd_info_tree) {
MACROBLOCKD *const xd = &x->e_mbd;
int is_cost_valid = 1;
- int64_t this_rd = 0;
+ int64_t this_rd = 0, skip_rd = 0;
if (ref_best_rd < 0) is_cost_valid = 0;
@@ -5111,40 +5111,40 @@
av1_get_entropy_contexts(bsize, pd, ctxa, ctxl);
memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
+ const int skip_ctx = av1_get_skip_context(xd);
+ const int s0 = x->skip_cost[skip_ctx][0];
+ const int s1 = x->skip_cost[skip_ctx][1];
+ skip_rd = RDCOST(x->rdmult, s1, 0);
+ this_rd = RDCOST(x->rdmult, s0, 0);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bw) {
+ int64_t best_rd_sofar = (ref_best_rd - (AOMMIN(skip_rd, this_rd)));
select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth,
plane_bsize, ctxa, ctxl, tx_above, tx_left,
- &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid,
- ftxs_mode, rd_info_tree);
+ &pn_rd_stats, best_rd_sofar, &is_cost_valid, ftxs_mode,
+ rd_info_tree);
if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
av1_invalid_rd_stats(rd_stats);
return;
}
av1_merge_rd_stats(rd_stats, &pn_rd_stats);
- this_rd +=
- AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
- RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
+ skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse);
+ this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
block += step;
if (rd_info_tree != NULL) rd_info_tree += 1;
}
}
+ if (skip_rd <= this_rd) {
+ this_rd = skip_rd;
+ rd_stats->rate = 0;
+ rd_stats->dist = rd_stats->sse;
+ rd_stats->skip = 1;
+ } else {
+ rd_stats->skip = 0;
+ }
}
- const int skip_ctx = av1_get_skip_context(xd);
- const int s0 = x->skip_cost[skip_ctx][0];
- const int s1 = x->skip_cost[skip_ctx][1];
- int64_t skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse);
- this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
- if (skip_rd <= this_rd) {
- this_rd = skip_rd;
- rd_stats->rate = 0;
- rd_stats->dist = rd_stats->sse;
- rd_stats->skip = 1;
- } else {
- rd_stats->skip = 0;
- }
if (this_rd > ref_best_rd) is_cost_valid = 0;
if (!is_cost_valid) {