Expand select_inter_block_yrd() Reduce call stack and some intermediate variable calculation. BUG=aomedia:2279 Change-Id: I7ba2941efad07b689ecb69c67a5d6fb4f7b6e400
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 63e555a..be7fd6a 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -5016,17 +5016,28 @@ } } -static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd, - FAST_TX_SEARCH_MODE ftxs_mode, - TXB_RD_INFO_NODE *rd_info_tree) { - if (ref_best_rd < 0) { - av1_invalid_rd_stats(rd_stats); - return; - } - +static int64_t select_tx_size_and_type(const AV1_COMP *cpi, MACROBLOCK *x, + RD_STATS *rd_stats, BLOCK_SIZE bsize, + int64_t ref_best_rd, + TXB_RD_INFO_NODE *rd_info_tree) { MACROBLOCKD *const xd = &x->e_mbd; + assert(is_inter_block(xd->mi[0])); + + // TODO(debargha): enable this as a speed feature where the + // select_inter_block_yrd() function above will use a simplified search + // such as not using full optimize, but the inter_block_yrd() function + // will use more complex search given that the transform partitions have + // already been decided. + + const int fast_tx_search = cpi->sf.tx_size_search_method > USE_FULL_RD; + int64_t rd_thresh = ref_best_rd; + if (fast_tx_search && rd_thresh < INT64_MAX) { + if (INT64_MAX - rd_thresh > (rd_thresh >> 3)) rd_thresh += (rd_thresh >> 3); + } + assert(rd_thresh > 0); + + const FAST_TX_SEARCH_MODE ftxs_mode = + fast_tx_search ? FTXS_DCT_AND_1D_DCT_ONLY : FTXS_NONE; const struct macroblockd_plane *const pd = &xd->plane[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); @@ -5057,9 +5068,8 @@ for (int idy = 0; idy < mi_height; idy += bh) { for (int idx = 0; idx < mi_width; idx += bw) { const int64_t best_rd_sofar = - (ref_best_rd == INT64_MAX) - ? INT64_MAX - : (ref_best_rd - (AOMMIN(skip_rd, this_rd))); + (rd_thresh == INT64_MAX) ? INT64_MAX + : (rd_thresh - (AOMMIN(skip_rd, this_rd))); int is_cost_valid = 1; RD_STATS pn_rd_stats; select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth, @@ -5068,7 +5078,7 @@ rd_info_tree); if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) { av1_invalid_rd_stats(rd_stats); - return; + return INT64_MAX; } av1_merge_rd_stats(rd_stats, &pn_rd_stats); skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse); @@ -5088,32 +5098,7 @@ } else { rd_stats->skip = 0; } -} -static int64_t select_tx_size_and_type(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd, - TXB_RD_INFO_NODE *rd_info_tree) { - MACROBLOCKD *const xd = &x->e_mbd; - assert(is_inter_block(xd->mi[0])); - - // TODO(debargha): enable this as a speed feature where the - // select_inter_block_yrd() function above will use a simplified search - // such as not using full optimize, but the inter_block_yrd() function - // will use more complex search given that the transform partitions have - // already been decided. - - const int fast_tx_search = cpi->sf.tx_size_search_method > USE_FULL_RD; - int64_t rd_thresh = ref_best_rd; - if (fast_tx_search && rd_thresh < INT64_MAX) { - if (INT64_MAX - rd_thresh > (rd_thresh >> 3)) rd_thresh += (rd_thresh >> 3); - } - assert(rd_thresh > 0); - - FAST_TX_SEARCH_MODE ftxs_mode = - fast_tx_search ? FTXS_DCT_AND_1D_DCT_ONLY : FTXS_NONE; - select_inter_block_yrd(cpi, x, rd_stats, bsize, rd_thresh, ftxs_mode, - rd_info_tree); if (rd_stats->rate == INT_MAX) return INT64_MAX; // If fast_tx_search is true, only DCT and 1D DCT were tested in @@ -5125,9 +5110,6 @@ } int64_t rd; - const int skip_ctx = av1_get_skip_context(xd); - const int s0 = x->skip_cost[skip_ctx][0]; - const int s1 = x->skip_cost[skip_ctx][1]; if (rd_stats->skip) { rd = RDCOST(x->rdmult, s1, rd_stats->sse); #if CONFIG_ONE_PASS_SVM