tx block rd hash: fix tx block order
Previously the tx block order, i.e. the child index, was wrong for
rectangular partition blocks.
Re-eanbles tx rd hash for rectangular blocks, encoder speed up by 7%; no
compression quality loss.
BUG=aomedia:1106
Change-Id: I615b8c56744075088943d372e607c18795cedac4
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index a713bcf..65cb553 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4570,7 +4570,7 @@
const int bh = block_size_high[bsize];
// Hashing is performed only for square TX sizes larger than TX_4X4
- if (max_square_tx_size < TX_8X8 || bw != bh) return 0;
+ if (max_square_tx_size < TX_8X8) return 0;
const int bw_mi = mi_size_wide[bsize];
const int diff_stride = bw;
@@ -4584,13 +4584,14 @@
int cur_rd_info_idx = 0;
int cur_tx_depth = 0;
uint8_t parent_idx_buf[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
+ uint8_t child_idx_buf[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
- int cur_tx_size = max_txsize_rect_lookup[1][bsize];
+ TX_SIZE cur_tx_size = max_txsize_rect_lookup[1][bsize];
while (cur_tx_depth <= MAX_VARTX_DEPTH) {
const int cur_tx_bw = tx_size_wide[cur_tx_size];
const int cur_tx_bh = tx_size_high[cur_tx_size];
if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
-
+ const TX_SIZE next_tx_size = sub_tx_size_map[1][cur_tx_size];
for (int row = 0; row < bh; row += cur_tx_bh) {
for (int col = 0; col < bw; col += cur_tx_bw) {
if (cur_tx_bw != cur_tx_bh) {
@@ -4626,26 +4627,38 @@
// Update the output quadtree RD info structure.
av1_zero(dst_rd_info[cur_rd_info_idx].children);
- const int block_mi_row = row / MI_SIZE;
- const int block_mi_col = col / MI_SIZE;
- if (cur_tx_depth > 0) {
- const int y_odd = (row / cur_tx_bh) % 2;
- const int x_odd = (col / cur_tx_bw) % 2;
- const int child_idx = y_odd ? (x_odd ? 3 : 2) : (x_odd ? 1 : 0);
- const int mi_index = block_mi_row * bw_mi + block_mi_col;
+ const int this_mi_row = row / MI_SIZE;
+ const int this_mi_col = col / MI_SIZE;
+ if (cur_tx_depth > 0) { // Set up child pointers.
+ const int mi_index = this_mi_row * bw_mi + this_mi_col;
+ const int child_idx = child_idx_buf[mi_index];
+ assert(child_idx < 4);
dst_rd_info[parent_idx_buf[mi_index]].children[child_idx] =
&dst_rd_info[cur_rd_info_idx];
}
- const int tx_bh_mi = cur_tx_bh / MI_SIZE;
- const int tx_bw_mi = cur_tx_bw / MI_SIZE;
- for (int i = block_mi_row; i < block_mi_row + tx_bh_mi; ++i) {
- memset(parent_idx_buf + i * bw_mi + block_mi_col, cur_rd_info_idx,
- tx_bw_mi);
+ if (cur_tx_depth < MAX_VARTX_DEPTH) { // Set up parent and child idx.
+ const int tx_bh_mi = cur_tx_bh / MI_SIZE;
+ const int tx_bw_mi = cur_tx_bw / MI_SIZE;
+ for (int i = this_mi_row; i < this_mi_row + tx_bh_mi; ++i) {
+ memset(parent_idx_buf + i * bw_mi + this_mi_col, cur_rd_info_idx,
+ tx_bw_mi);
+ }
+ int child_idx = 0;
+ const int next_tx_bh_mi = tx_size_wide_unit[next_tx_size];
+ const int next_tx_bw_mi = tx_size_wide_unit[next_tx_size];
+ for (int i = this_mi_row; i < this_mi_row + tx_bh_mi;
+ i += next_tx_bh_mi) {
+ for (int j = this_mi_col; j < this_mi_col + tx_bw_mi;
+ j += next_tx_bw_mi) {
+ assert(child_idx < 4);
+ child_idx_buf[i * bw_mi + j] = child_idx++;
+ }
+ }
}
++cur_rd_info_idx;
}
}
- cur_tx_size = sub_tx_size_map[1][cur_tx_size];
+ cur_tx_size = next_tx_size;
++cur_tx_depth;
}
return 1;