Reduce memory usage of txk_type[] Reduce the array size from 1024 to 64. On a cif video, encoder memory usage drops from 368 to 311 MB; decoder memory usage drops from 17.6 to 9.1 MB. Tested 30 frames on lowres, compression stats remain identical. Change-Id: I3b95bc8d3e57e1074751d81531bb9f79b195506e
diff --git a/av1/common/blockd.h b/av1/common/blockd.h index 6eeb716..e94f025 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h
@@ -225,6 +225,9 @@ } INTERINTER_COMPOUND_DATA; #define INTER_TX_SIZE_BUF_LEN 16 +#if CONFIG_TXK_SEL +#define TXK_TYPE_BUF_LEN 64 +#endif // This structure now relates to 4x4 block regions. typedef struct MB_MODE_INFO { // Common for both INTER and INTRA blocks @@ -253,7 +256,7 @@ MV_REFERENCE_FRAME ref_frame[2]; TX_TYPE tx_type; #if CONFIG_TXK_SEL - TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; + TX_TYPE txk_type[TXK_TYPE_BUF_LEN]; #endif #if CONFIG_FILTER_INTRA @@ -883,6 +886,22 @@ return index; } +#if CONFIG_TXK_SEL +static INLINE int av1_get_txk_type_index(BLOCK_SIZE bsize, int blk_row, + int blk_col) { + TX_SIZE txs = max_txsize_rect_lookup[bsize]; + for (int level = 0; level < MAX_VARTX_DEPTH; ++level) + txs = sub_tx_size_map[1][txs]; + const int tx_w = tx_size_wide_unit[txs]; + const int tx_h = tx_size_high_unit[txs]; + const int bw_uint = mi_size_wide[bsize]; + const int stride = bw_uint / tx_w; + const int index = (blk_row / tx_h) * stride + (blk_col / tx_w); + assert(index < TXK_TYPE_BUF_LEN); + return index; +} +#endif // CONFIG_TXK_SEL + static INLINE TX_TYPE av1_get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int blk_row, int blk_col, TX_SIZE tx_size, @@ -900,12 +919,16 @@ tx_type = DCT_DCT; } else { if (plane_type == PLANE_TYPE_Y) { - tx_type = mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col]; + const int txk_type_idx = + av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col); + tx_type = mbmi->txk_type[txk_type_idx]; } else if (is_inter_block(mbmi)) { // scale back to y plane's coordinate blk_row <<= pd->subsampling_y; blk_col <<= pd->subsampling_x; - tx_type = mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col]; + const int txk_type_idx = + av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col); + tx_type = mbmi->txk_type[txk_type_idx]; } else { // In intra mode, uv planes don't share the same prediction mode as y // plane, so the tx_type should not be shared
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index 517cc58..5cbd95b 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c
@@ -810,7 +810,9 @@ #else // only y plane's tx_type is transmitted if (plane > 0) return; - TX_TYPE *tx_type = &mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col]; + const int txk_type_idx = + av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col); + TX_TYPE *tx_type = &mbmi->txk_type[txk_type_idx]; #endif const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c index 7d57146..3c68bf0 100644 --- a/av1/decoder/decodetxb.c +++ b/av1/decoder/decodetxb.c
@@ -144,8 +144,11 @@ if (all_zero) { *max_scan_line = 0; #if CONFIG_TXK_SEL - if (plane == 0) - mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = DCT_DCT; + if (plane == 0) { + const int txk_type_idx = + av1_get_txk_type_index(mbmi->sb_type, blk_row, blk_col); + mbmi->txk_type[txk_type_idx] = DCT_DCT; + } #endif return 0; }
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 7579213..49509b7 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -108,7 +108,7 @@ TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN]; uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8]; #if CONFIG_TXK_SEL - TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; + TX_TYPE txk_type[TXK_TYPE_BUF_LEN]; #endif // CONFIG_TXK_SEL RD_STATS rd_stats; uint32_t hash_value;
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index 2c7b87f..3a3b7a2 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -476,8 +476,7 @@ reset_tx_size(xd, mbmi, cm->tx_mode); #if CONFIG_TXK_SEL memset(mbmi->txk_type, DCT_DCT, - sizeof(mbmi->txk_type[0]) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); #endif } // Else for cyclic refresh mode update the segment map, set the segment id @@ -488,8 +487,7 @@ reset_tx_size(xd, mbmi, cm->tx_mode); #if CONFIG_TXK_SEL memset(mbmi->txk_type, DCT_DCT, - sizeof(mbmi->txk_type[0]) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); #endif } }
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index e0fac35..e66b734 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -254,20 +254,17 @@ } #if CONFIG_TXK_SEL - if (args->cpi->oxcf.aq_mode != NO_AQ && p->eobs[block] == 0 && plane == 0) - xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = - DCT_DCT; - + const int txk_type_idx = + av1_get_txk_type_index(plane_bsize, blk_row, blk_col); + if (args->cpi->oxcf.aq_mode != NO_AQ && p->eobs[block] == 0 && plane == 0) { + xd->mi[0]->mbmi.txk_type[txk_type_idx] = DCT_DCT; + } uint8_t disable_txk_check = args->enable_optimize_b; - if (plane == 0 && p->eobs[block] == 0) { if (disable_txk_check) { - xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = - DCT_DCT; + xd->mi[0]->mbmi.txk_type[txk_type_idx] = DCT_DCT; } else { - assert( - xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] == - DCT_DCT); + assert(xd->mi[0]->mbmi.txk_type[txk_type_idx] == DCT_DCT); } } #endif // CONFIG_TXK_SEL @@ -537,9 +534,8 @@ *eob = 0; p->txb_entropy_ctx[block] = 0; *(args->skip) = 0; - assert(xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] == - DCT_DCT); - + assert(xd->mi[0]->mbmi.txk_type[av1_get_txk_type_index( + plane_bsize, blk_row, blk_col)] == DCT_DCT); #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(&xd->mi[0]->mbmi)) { @@ -562,9 +558,8 @@ #if CONFIG_TXK_SEL if (plane == 0 && p->eobs[block] == 0) { - assert( - xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] == - DCT_DCT); + assert(xd->mi[0]->mbmi.txk_type[av1_get_txk_type_index( + plane_bsize, blk_row, blk_col)] == DCT_DCT); } #endif // CONFIG_TXK_SEL } else { @@ -574,9 +569,11 @@ } #if CONFIG_TXK_SEL - if (args->cpi->oxcf.aq_mode != NO_AQ && !*eob && plane == 0) - xd->mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = - DCT_DCT; + if (args->cpi->oxcf.aq_mode != NO_AQ && !*eob && plane == 0) { + const int txk_type_idx = + av1_get_txk_type_index(plane_bsize, blk_row, blk_col); + xd->mi[0]->mbmi.txk_type[txk_type_idx] = DCT_DCT; + } #endif av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 5b9c392..4ae70ee 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -1849,9 +1849,10 @@ } #if CONFIG_TXK_SEL -static void update_txk_array(TX_TYPE *txk_type, int blk_row, int blk_col, - TX_SIZE tx_size, TX_TYPE tx_type) { - txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = tx_type; +static void update_txk_array(TX_TYPE *txk_type, BLOCK_SIZE bsize, int blk_row, + int blk_col, TX_SIZE tx_size, TX_TYPE tx_type) { + const int txk_type_idx = av1_get_txk_type_index(bsize, blk_row, blk_col); + txk_type[txk_type_idx] = tx_type; const int txw = tx_size_wide_unit[tx_size]; const int txh = tx_size_high_unit[tx_size]; @@ -1862,10 +1863,13 @@ if (txw == tx_size_wide_unit[TX_64X64] || txh == tx_size_high_unit[TX_64X64]) { const int tx_unit = tx_size_wide_unit[TX_16X16]; - for (int idy = 0; idy < txh; idy += tx_unit) - for (int idx = 0; idx < txw; idx += tx_unit) - txk_type[((blk_row + idy) << MAX_MIB_SIZE_LOG2) + (blk_col + idx)] = - tx_type; + for (int idy = 0; idy < txh; idy += tx_unit) { + for (int idx = 0; idx < txw; idx += tx_unit) { + const int this_index = + av1_get_txk_type_index(bsize, blk_row + idy, blk_col + idx); + txk_type[this_index] = tx_type; + } + } } } @@ -1919,7 +1923,9 @@ if (is_inter && x->use_default_inter_tx_type && tx_type != get_default_tx_type(0, xd, tx_size)) continue; - mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = tx_type; + const int txk_type_idx = + av1_get_txk_type_index(plane_bsize, blk_row, blk_col); + mbmi->txk_type[txk_type_idx] = tx_type; } const TX_TYPE ref_tx_type = av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size, @@ -1978,8 +1984,10 @@ if (best_eob == 0) best_tx_type = DCT_DCT; - if (plane == 0) - update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, best_tx_type); + if (plane == 0) { + update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, + best_tx_type); + } x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx; x->plane[plane].eobs[block] = best_eob; @@ -2592,7 +2600,7 @@ TX_SIZE best_tx_size = max_rect_tx_size; TX_TYPE best_tx_type = DCT_DCT; #if CONFIG_TXK_SEL - TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; + TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; #endif // CONFIG_TXK_SEL uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; const int n4 = bsize_to_num_blk(bs); @@ -2644,8 +2652,7 @@ if (rd < best_rd) { #if CONFIG_TXK_SEL memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * MAX_SB_SQUARE / - (TX_SIZE_W_MIN * TX_SIZE_H_MIN)); + sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); #endif memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4); best_tx_type = tx_type; @@ -2671,8 +2678,7 @@ mbmi->tx_type = best_tx_type; #if CONFIG_TXK_SEL memcpy(mbmi->txk_type, best_txk_type, - sizeof(best_txk_type[0]) * MAX_SB_SQUARE / - (TX_SIZE_W_MIN * TX_SIZE_H_MIN)); + sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); #endif memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4); @@ -3147,7 +3153,7 @@ FILTER_INTRA_MODE_INFO filter_intra_mode_info; TX_TYPE best_tx_type; #if CONFIG_TXK_SEL - TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; + TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; #endif (void)ctx; av1_zero(filter_intra_mode_info); @@ -3178,8 +3184,7 @@ best_tx_type = mbmi->tx_type; #if CONFIG_TXK_SEL memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); #endif memcpy(ctx->blk_skip[0], x->blk_skip[0], sizeof(uint8_t) * ctx->num_4x4_blk); @@ -3198,8 +3203,7 @@ mbmi->tx_type = best_tx_type; #if CONFIG_TXK_SEL memcpy(mbmi->txk_type, best_txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); #endif return 1; } else { @@ -3250,8 +3254,7 @@ if (this_rd < *best_rd) { #if CONFIG_TXK_SEL memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); #endif memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4); *best_rd = this_rd; @@ -3284,7 +3287,7 @@ TX_TYPE best_tx_type = mbmi->tx_type; #if CONFIG_TXK_SEL const int n4 = bsize_to_num_blk(bsize); - TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; + TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; #else TX_TYPE *best_txk_type = NULL; #endif @@ -3335,8 +3338,7 @@ mbmi->tx_type = best_tx_type; #if CONFIG_TXK_SEL memcpy(mbmi->txk_type, best_txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4); #endif return best_rd; @@ -3714,6 +3716,8 @@ const uint16_t cur_joint_ctx = (txb_ctx.dc_sign_ctx << 8) + txb_ctx.txb_skip_ctx; + const int txk_type_idx = + av1_get_txk_type_index(plane_bsize, blk_row, blk_col); // Look up RD and terminate early in case when we've already processed exactly // the same residual with exactly the same entropy context. if (rd_info_array != NULL && rd_info_array->valid && @@ -3725,8 +3729,7 @@ p->eobs[block] = rd_info_array->eob; p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx; if (plane == 0) { - x->e_mbd.mi[0]->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = - rd_info_array->tx_type; + x->e_mbd.mi[0]->mbmi.txk_type[txk_type_idx] = rd_info_array->tx_type; } return; } @@ -3747,9 +3750,7 @@ rd_info_array->eob = p->eobs[block]; rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block]; if (plane == 0) { - rd_info_array->tx_type = - x->e_mbd.mi[0] - ->mbmi.txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col]; + rd_info_array->tx_type = x->e_mbd.mi[0]->mbmi.txk_type[txk_type_idx]; } } @@ -4024,7 +4025,8 @@ x->blk_skip[plane][blk_row * bw + blk_col] = 1; p->eobs[block] = 0; #if CONFIG_TXK_SEL - update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, DCT_DCT); + update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, + DCT_DCT); #endif } else { x->blk_skip[plane][blk_row * bw + blk_col] = 0; @@ -4037,8 +4039,9 @@ tmp_eob = p->txb_entropy_ctx[block]; #if CONFIG_TXK_SEL - const int txk_idx = (blk_row << MAX_MIB_SIZE_LOG2) + blk_col; - best_tx_type = mbmi->txk_type[txk_idx]; + const int txk_type_idx = + av1_get_txk_type_index(plane_bsize, blk_row, blk_col); + best_tx_type = mbmi->txk_type[txk_type_idx]; #endif } @@ -4215,7 +4218,8 @@ mbmi->tx_size = tx_size_selected; #if CONFIG_TXK_SEL - update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, best_tx_type); + update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, + best_tx_type); #endif if (this_rd == INT64_MAX) *is_cost_valid = 0; x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip; @@ -4425,7 +4429,8 @@ x->plane[plane].eobs[block] = 0; x->plane[plane].txb_entropy_ctx[block] = 0; #if CONFIG_TXK_SEL - update_txk_array(mbmi->txk_type, blk_row, blk_col, tx_size, DCT_DCT); + update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, + DCT_DCT); #endif } else { rd_stats->skip = 0; @@ -4850,9 +4855,7 @@ const TX_SIZE tx_size = get_max_rect_tx_size(bsize, is_inter_block(mbmi)); mbmi->tx_type = DCT_DCT; #if CONFIG_TXK_SEL - memset(mbmi->txk_type, DCT_DCT, - sizeof(mbmi->txk_type[0]) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); #endif memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size)); mbmi->tx_size = tx_size; @@ -9798,10 +9801,9 @@ TX_SIZE best_tx_size = mbmi->tx_size; TX_TYPE best_tx_type = mbmi->tx_type; #if CONFIG_TXK_SEL - TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; + TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); #endif FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED; int64_t best_rd_tmp = INT64_MAX; @@ -9832,8 +9834,7 @@ best_tx_type = mbmi->tx_type; #if CONFIG_TXK_SEL memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); #endif memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); @@ -9851,8 +9852,7 @@ mbmi->tx_type = best_tx_type; #if CONFIG_TXK_SEL memcpy(mbmi->txk_type, best_txk_type, - sizeof(*best_txk_type) * - (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN))); + sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); #endif memcpy(x->blk_skip[0], best_blk_skip, sizeof(x->blk_skip[0][0]) * ctx->num_4x4_blk);