Use mb_rd_hash in super_block_yrd
For speed 3,4 presets, BD-rate improved by 0.03% and 0.02% (as per AWCY runs)
with encode time reduction of 1% and 0% (averaged across multiple test cases).
STATS_CHANGED
Change-Id: I056f9ced9df7db017dcf3a76be65d68f8e5b1b87
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index c7c724d..188a277 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3932,18 +3932,97 @@
(block_size_high[bsize] >> tx_size_high_log2[tx_size]);
}
+static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
+ const int rows = block_size_high[bsize];
+ const int cols = block_size_wide[bsize];
+ const int16_t *diff = x->plane[0].src_diff;
+ const uint32_t hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator,
+ (uint8_t *)diff, 2 * rows * cols);
+ return (hash << 5) + bsize;
+}
+
+static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
+ const RD_STATS *const rd_stats,
+ MB_RD_RECORD *tx_rd_record) {
+ int index;
+ if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
+ index =
+ (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
+ ++tx_rd_record->num;
+ } else {
+ index = tx_rd_record->index_start;
+ tx_rd_record->index_start =
+ (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
+ }
+ MB_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
+ tx_rd_info->hash_value = hash;
+ tx_rd_info->tx_size = mbmi->tx_size;
+ memcpy(tx_rd_info->blk_skip, x->blk_skip,
+ sizeof(tx_rd_info->blk_skip[0]) * n4);
+ av1_copy(tx_rd_info->inter_tx_size, mbmi->inter_tx_size);
+ av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
+ tx_rd_info->rd_stats = *rd_stats;
+}
+
+static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info,
+ RD_STATS *const rd_stats, MACROBLOCK *const x) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
+ mbmi->tx_size = tx_rd_info->tx_size;
+ memcpy(x->blk_skip, tx_rd_info->blk_skip,
+ sizeof(tx_rd_info->blk_skip[0]) * n4);
+ av1_copy(mbmi->inter_tx_size, tx_rd_info->inter_tx_size);
+ av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
+ *rd_stats = tx_rd_info->rd_stats;
+}
+
static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, BLOCK_SIZE bs,
int64_t ref_best_rd) {
MACROBLOCKD *xd = &x->e_mbd;
av1_init_rd_stats(rd_stats);
-
+ int is_inter = is_inter_block(xd->mi[0]);
assert(bs == xd->mi[0]->sb_type);
+ const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
+ const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
+
+ uint32_t hash = 0;
+ MB_RD_RECORD *mb_rd_record = NULL;
+ const int within_border = mi_row >= xd->tile.mi_row_start &&
+ (mi_row + mi_size_high[bs] < xd->tile.mi_row_end) &&
+ mi_col >= xd->tile.mi_col_start &&
+ (mi_col + mi_size_wide[bs] < xd->tile.mi_col_end);
+ const int is_mb_rd_hash_enabled =
+ (within_border && cpi->sf.use_mb_rd_hash && is_inter);
+ const int n4 = bsize_to_num_blk(bs);
+ if (is_mb_rd_hash_enabled) {
+ hash = get_block_residue_hash(x, bs);
+ mb_rd_record = &x->mb_rd_record;
+ if (ref_best_rd != INT64_MAX) {
+ for (int i = 0; i < mb_rd_record->num; ++i) {
+ const int index =
+ (mb_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
+ // If there is a match in the tx_rd_record, fetch the RD decision and
+ // terminate early.
+ if (mb_rd_record->tx_rd_info[index].hash_value == hash) {
+ MB_RD_INFO *tx_rd_info = &mb_rd_record->tx_rd_info[index];
+ fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
+ // Reset the pruning flags.
+ av1_zero(x->tx_search_prune);
+ x->tx_split_prune_flag = 0;
+ return;
+ }
+ }
+ }
+ }
+
// If we predict that skip is the optimal RD decision - set the respective
// context and terminate early.
int64_t dist;
- int is_inter = is_inter_block(xd->mi[0]);
+
if (cpi->sf.tx_type_search.use_skip_flag_prediction && is_inter &&
(!xd->lossless[xd->mi[0]->segment_id]) &&
predict_skip_flag(x, bs, &dist, cpi->common.reduced_tx_set_used)) {
@@ -3955,6 +4034,9 @@
calc_regional_sse(x, bs, dist, rd_stats);
}
#endif
+ // Save the RD search results into tx_rd_record.
+ if (is_mb_rd_hash_enabled)
+ save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record);
// Reset the pruning flags.
av1_zero(x->tx_search_prune);
x->tx_split_prune_flag = 0;
@@ -3968,6 +4050,12 @@
} else {
choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
}
+
+ // Save the RD search results into tx_rd_record.
+ if (is_mb_rd_hash_enabled) {
+ assert(mb_rd_record != NULL);
+ save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record);
+ }
}
// Return the rate cost for luma prediction mode info. of intra blocks.
@@ -5587,52 +5675,6 @@
return is_cost_valid;
}
-static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
- const int rows = block_size_high[bsize];
- const int cols = block_size_wide[bsize];
- const int16_t *diff = x->plane[0].src_diff;
- const uint32_t hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator,
- (uint8_t *)diff, 2 * rows * cols);
- return (hash << 5) + bsize;
-}
-
-static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
- const RD_STATS *const rd_stats,
- MB_RD_RECORD *tx_rd_record) {
- int index;
- if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
- index =
- (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
- ++tx_rd_record->num;
- } else {
- index = tx_rd_record->index_start;
- tx_rd_record->index_start =
- (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
- }
- MB_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
- const MACROBLOCKD *const xd = &x->e_mbd;
- const MB_MODE_INFO *const mbmi = xd->mi[0];
- tx_rd_info->hash_value = hash;
- tx_rd_info->tx_size = mbmi->tx_size;
- memcpy(tx_rd_info->blk_skip, x->blk_skip,
- sizeof(tx_rd_info->blk_skip[0]) * n4);
- av1_copy(tx_rd_info->inter_tx_size, mbmi->inter_tx_size);
- av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
- tx_rd_info->rd_stats = *rd_stats;
-}
-
-static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info,
- RD_STATS *const rd_stats, MACROBLOCK *const x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
- mbmi->tx_size = tx_rd_info->tx_size;
- memcpy(x->blk_skip, tx_rd_info->blk_skip,
- sizeof(tx_rd_info->blk_skip[0]) * n4);
- av1_copy(mbmi->inter_tx_size, tx_rd_info->inter_tx_size);
- av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
- *rd_stats = tx_rd_info->rd_stats;
-}
-
static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record,
const uint32_t hash) {
// Linear search through the circular buffer to find matching hash.