Refactor var-tx pipeline to support cb4x4 mode
Replace hard coded 4x4 transform block step size assumption with
scalable table access.
Change-Id: Ib1cc555c2641e5634acdd91ca33217f00aeb0b89
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 32b9798..eb9a951 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3150,8 +3150,11 @@
int txb_w = tx_size_wide_unit[tx_size];
int src_stride = p->src.stride;
- uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
- uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
+ uint8_t *src =
+ &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
+ uint8_t *dst =
+ &pd->dst
+ .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
#if CONFIG_AOM_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
uint8_t *rec_buffer;
@@ -3161,7 +3164,8 @@
int max_blocks_high = block_size_high[plane_bsize];
int max_blocks_wide = block_size_wide[plane_bsize];
const int diff_stride = max_blocks_wide;
- const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
+ const int16_t *diff =
+ &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
int txb_coeff_cost;
assert(tx_size < TX_SIZES_ALL);
@@ -3206,10 +3210,11 @@
int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
tmp = 0;
- for (idy = 0; idy < blocks_height; idy += 2) {
- for (idx = 0; idx < blocks_width; idx += 2) {
- const int16_t *d = diff + 4 * idy * diff_stride + 4 * idx;
- tmp += aom_sum_squares_2d_i16(d, diff_stride, 8);
+ for (idy = 0; idy < blocks_height; ++idy) {
+ for (idx = 0; idx < blocks_width; ++idx) {
+ const int16_t *d =
+ diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
+ tmp += aom_sum_squares_2d_i16(d, diff_stride, 4);
}
}
} else {
@@ -3247,11 +3252,13 @@
int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
tmp = 0;
- for (idy = 0; idy < blocks_height; idy += 2) {
- for (idx = 0; idx < blocks_width; idx += 2) {
- uint8_t *const s = src + 4 * idy * src_stride + 4 * idx;
- uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx;
- cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
+ for (idy = 0; idy < blocks_height; ++idy) {
+ for (idx = 0; idx < blocks_width; ++idx) {
+ uint8_t *const s =
+ src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
+ uint8_t *const r =
+ rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
+ cpi->fn_ptr[BLOCK_4X4].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
tmp += this_dist;
}
}
@@ -3428,8 +3435,8 @@
if (is_cost_valid) {
const struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
- const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+ const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
@@ -3445,7 +3452,7 @@
RD_STATS pn_rd_stats;
av1_init_rd_stats(&pn_rd_stats);
- av1_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
+ av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
memcpy(tx_above, xd->above_txfm_context,
sizeof(TXFM_CONTEXT) * (mi_width >> 1));
memcpy(tx_left, xd->left_txfm_context,
@@ -3552,8 +3559,8 @@
TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
TX_SIZE best_tx = max_txsize_lookup[bsize];
TX_SIZE best_min_tx_size = TX_SIZES_ALL;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
- const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
+ uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
+ const int n4 = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
int idx, idy;
int prune = 0;
const int count32 = 1 << (2 * (cpi->common.mib_size_log2 -
@@ -3716,8 +3723,8 @@
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
- const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+ const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+ const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
@@ -3729,7 +3736,7 @@
RD_STATS pn_rd_stats;
av1_init_rd_stats(&pn_rd_stats);
- av1_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
+ av1_get_entropy_contexts(bsize, 0, pd, ta, tl);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bw) {