Rework hash map for txk-sel
The txk-sel allows each transform block to select its own
transform kernel. Such locality enables one to store the selected
RD cost including tx_type selection per transform block size.
It reduces the needed hash map size to 1 / 16 of what is needed
without txk-sel.
This commit re-works the hash map RD cost fetch for txk-sel. Tested
on red_kayak_480p in speed 1, enabling txk-sel makes the encoding
speed 12% faster than the baseline without txk-sel on. Further
enabling reduced hash map size gains speed 1 another 10%.
Change-Id: I4a5d99d27e2a76b10e76c00a8178f692c95fdf13
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3751c19..604e781 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -132,9 +132,13 @@
typedef struct {
int64_t dist;
+ int64_t sse;
int rate;
uint16_t eob;
#if CONFIG_LV_MAP
+#if CONFIG_TXK_SEL
+ TX_TYPE tx_type;
+#endif
uint16_t entropy_context;
uint8_t txb_entropy_ctx;
#else
@@ -147,7 +151,11 @@
#define TX_SIZE_RD_RECORD_BUFFER_LEN 256
typedef struct {
uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN];
+#if CONFIG_TXK_SEL
+ TX_SIZE_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN];
+#else
TX_SIZE_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN][TX_TYPES];
+#endif
int index_start;
int num;
} TX_SIZE_RD_RECORD;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 6679d50..7f3d81d 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3629,19 +3629,58 @@
int plane_bsize, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l, RD_STATS *rd_stats, int fast,
TX_SIZE_RD_INFO *rd_info_array) {
+ const struct macroblock_plane *const p = &x->plane[plane];
#if CONFIG_TXK_SEL
- (void)fast;
- (void)rd_info_array;
+ TXB_CTX txb_ctx;
+ get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
+ const uint16_t cur_joint_ctx =
+ (txb_ctx.dc_sign_ctx << 8) + txb_ctx.txb_skip_ctx;
+
+ // Look up RD and terminate early in case when we've already processed exactly
+ // the same residual with exactly the same entropy context.
+ if (rd_info_array != NULL && rd_info_array->valid &&
+ rd_info_array->entropy_context == cur_joint_ctx &&
+ rd_info_array->fast == fast) {
+ rd_stats->rate += rd_info_array->rate;
+ rd_stats->dist += rd_info_array->dist;
+ rd_stats->sse += rd_info_array->sse;
+ rd_stats->skip &= rd_info_array->eob == 0;
+ p->eobs[block] = rd_info_array->eob;
+ p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx;
+ if (plane == 0) {
+ x->e_mbd.mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] =
+ rd_info_array->tx_type;
+ }
+ return;
+ }
RD_STATS this_rd_stats;
search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
a, l, 0, &this_rd_stats);
+
av1_merge_rd_stats(rd_stats, &this_rd_stats);
+
+ // Save RD results for possible reuse in future.
+ if (rd_info_array != NULL) {
+ rd_info_array->valid = 1;
+ rd_info_array->entropy_context = cur_joint_ctx;
+ rd_info_array->fast = fast;
+ rd_info_array->rate = this_rd_stats.rate;
+ rd_info_array->dist = this_rd_stats.dist;
+ rd_info_array->sse = this_rd_stats.sse;
+ rd_info_array->eob = p->eobs[block];
+ rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block];
+ if (plane == 0) {
+ rd_info_array->tx_type =
+ x->e_mbd.mi[0]
+ ->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col];
+ }
+ }
+
return;
#else
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- const struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
// This function is used only for inter
@@ -4611,11 +4650,18 @@
const int rd_record_idx =
row_in_sb * (MAX_MIB_SIZE >> (cur_tx_size + 1 - TX_8X8)) +
col_in_sb;
+
int idx = find_tx_size_rd_info(
&rd_records_table[cur_tx_size - TX_8X8][rd_record_idx], hash);
+#if CONFIG_TXK_SEL
+ dst_rd_info[cur_rd_info_idx].rd_info_array =
+ &rd_records_table[cur_tx_size - TX_8X8][rd_record_idx]
+ .tx_rd_info[idx];
+#else
dst_rd_info[cur_rd_info_idx].rd_info_array =
rd_records_table[cur_tx_size - TX_8X8][rd_record_idx]
.tx_rd_info[idx];
+#endif
}
// Update the output quadtree RD info structure.