Populate rd_stats appropriately
Populate rd_stats appropriately for correct non-skip cost calculation.
For speed = 1, 2, 3 and 4 presets, BD-rate improvement is seen and impact
is -0.09%, -0.07%, -0.03% and -0.01% (as per AWCY runs), with encode time
reduction of 0.0%, -1.2%(increase in encoder time), 0.2% and 0.3%
(averaged across multiple test cases) respectively.
STATS_CHANGED
Change-Id: I14087831d04f03f0eea7482914809e42db957489
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 442cc5f..93c8c25 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -5127,8 +5127,6 @@
}
if (skip_rd <= this_rd) {
- rd_stats->rate = 0;
- rd_stats->dist = rd_stats->sse;
rd_stats->skip = 1;
#if CONFIG_ONE_PASS_SVM
av1_reg_stat_skipmode_update(rd_stats, x->rdmult);
@@ -5775,10 +5773,33 @@
mbmi->tx_size = tx_size;
for (int i = 0; i < n4; ++i) set_blk_skip(x, 0, i, 1);
rd_stats->skip = 1;
- rd_stats->rate = 0;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2);
rd_stats->dist = rd_stats->sse = (dist << 4);
+ // Though decision is to make the block as skip based on luma stats,
+ // it is possible that block becomes non skip after chroma rd. In addition
+ // intermediate non skip costs calculated by caller function will be
+ // incorrect, if rate is set as zero (i.e., if zero_blk_rate is not
+ // accounted). Hence intermediate rate is populated to code the luma tx blks
+ // as skip, the caller function based on final rd decision (i.e., skip vs
+ // non-skip) sets the final rate accordingly. Here the rate populated
+ // corresponds to coding all the tx blocks with zero_blk_rate (based on max tx
+ // size possible) in the current block. Eg: For 128*128 block, rate would be
+ // 4 * zero_blk_rate where zero_blk_rate corresponds to coding of one 64x64 tx
+ // block as 'all zeros'
+ ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE];
+ ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE];
+ av1_get_entropy_contexts(bsize, &xd->plane[0], ctxa, ctxl);
+ ENTROPY_CONTEXT *ta = ctxa;
+ ENTROPY_CONTEXT *tl = ctxl;
+ const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
+ TXB_CTX txb_ctx;
+ get_txb_ctx(bsize, tx_size, 0, ta, tl, &txb_ctx);
+ const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_Y]
+ .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
+ rd_stats->rate = zero_blk_rate *
+ (block_size_wide[bsize] >> tx_size_wide_log2[tx_size]) *
+ (block_size_high[bsize] >> tx_size_high_log2[tx_size]);
}
// Search for best transform size and type for luma inter blocks.
@@ -5881,8 +5902,7 @@
const int64_t rd =
select_tx_size_and_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
found_rd_info ? matched_rd_info : NULL);
- assert(IMPLIES(this_rd_stats.skip && !this_rd_stats.invalid_rate,
- this_rd_stats.rate == 0));
+
if (rd < INT64_MAX) {
*rd_stats = this_rd_stats;
found = 1;
@@ -8769,6 +8789,9 @@
rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
rd_stats_y->rate = 0;
rd_stats_uv->rate = 0;
+ rd_stats->dist = rd_stats->sse;
+ rd_stats_y->dist = rd_stats_y->sse;
+ rd_stats_uv->dist = rd_stats_uv->sse;
rd_stats->rate += skip_flag_cost[1];
mbmi->skip = 1;
// here mbmi->skip temporarily plays a role as what this_skip2 does
@@ -8789,6 +8812,8 @@
rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
rd_stats->rate += skip_flag_cost[1];
rd_stats->dist = rd_stats->sse;
+ rd_stats_y->dist = rd_stats_y->sse;
+ rd_stats_uv->dist = rd_stats_uv->sse;
rd_stats_y->rate = 0;
rd_stats_uv->rate = 0;
mbmi->skip = 1;