av1/encoder/rdopt_utils.h - avm - Git at Google

 /*
  * Copyright (c) 2021, Alliance for Open Media. All rights reserved
  *
  * This source code is subject to the terms of the BSD 3-Clause Clear License
  * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
  * License was not distributed with this source code in the LICENSE file, you
  * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
  * Alliance for Open Media Patent License 1.0 was not distributed with this
  * source code in the PATENTS file, you can obtain it at
  * aomedia.org/license/patent-license/.
  */

 #ifndef AOM_AV1_ENCODER_RDOPT_UTILS_H_
 #define AOM_AV1_ENCODER_RDOPT_UTILS_H_

 #include "aom/aom_integer.h"
 #include "av1/encoder/block.h"
 #include "av1/common/cfl.h"
 #include "av1/common/pred_common.h"

 #ifdef __cplusplus
 extern "C" {
 #endif

 #define MAX_REF_MV_SEARCH (MAX_REF_MV_STACK_SIZE)
 #define INTER_INTRA_RD_THRESH_SCALE 9
 #define INTER_INTRA_RD_THRESH_SHIFT 4

 typedef struct {
   PREDICTION_MODE mode;
   MV_REFERENCE_FRAME ref_frame[2];
 } MODE_DEFINITION;

 static AOM_INLINE void restore_dst_buf(MACROBLOCKD *xd, const BUFFER_SET dst,
                                        const int num_planes) {
   for (int i = 0; i < num_planes; i++) {
     xd->plane[i].dst.buf = dst.plane[i];
     xd->plane[i].dst.stride = dst.stride[i];
   }
 }

 /* clang-format on */
 // Calculate rd threshold based on ref best rd and relevant scaling factors
 static AOM_INLINE int64_t get_rd_thresh_from_best_rd(int64_t ref_best_rd,
                                                      int mul_factor,
                                                      int div_factor) {
   int64_t rd_thresh = ref_best_rd;
   if (div_factor != 0) {
     rd_thresh = ref_best_rd < (div_factor * (INT64_MAX / mul_factor))
                     ? ((ref_best_rd / div_factor) * mul_factor)
                     : INT64_MAX;
   }
   return rd_thresh;
 }

 static AOM_INLINE int inter_mode_data_block_idx(BLOCK_SIZE bsize) {
   if (bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
       bsize == BLOCK_4X16 || bsize == BLOCK_16X4) {
     return -1;
   }
   return 1;
 }

 // Get transform block visible dimensions cropped to the MI units.
 static AOM_INLINE void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
                                           BLOCK_SIZE plane_bsize, int blk_row,
                                           int blk_col, BLOCK_SIZE tx_bsize,
                                           int *width, int *height,
                                           int *visible_width,
                                           int *visible_height) {
   const int txb_height = block_size_high[tx_bsize];
   const int txb_width = block_size_wide[tx_bsize];
   assert(txb_height <= block_size_high[plane_bsize]);
   assert(txb_width <= block_size_wide[plane_bsize]);
   const struct macroblockd_plane *const pd = &xd->plane[plane];

   // TODO(aconverse@google.com): Investigate using crop_width/height here rather
   // than the MI size
   if (xd->mb_to_bottom_edge >= 0) {
     *visible_height = txb_height;
   } else {
     const int block_height = block_size_high[plane_bsize];
     const int block_rows =
         (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
     *visible_height =
         clamp(block_rows - (blk_row << MI_SIZE_LOG2), 0, txb_height);
   }
   if (height) *height = txb_height;

   if (xd->mb_to_right_edge >= 0) {
     *visible_width = txb_width;
   } else {
     const int block_width = block_size_wide[plane_bsize];
     const int block_cols =
         (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
     *visible_width =
         clamp(block_cols - (blk_col << MI_SIZE_LOG2), 0, txb_width);
   }
   if (width) *width = txb_width;
 }

 static AOM_INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
   int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * MI_SIZE_LOG2);
   return num_blk;
 }

 static INLINE int check_txfm_eval(MACROBLOCK *const x, BLOCK_SIZE bsize,
                                   int64_t best_skip_rd, int64_t skip_rd,
                                   int level, int is_luma_only) {
   int eval_txfm = 1;
   // Derive aggressiveness factor for gating the transform search
   // Lower value indicates more aggressiveness. Be more conservative (high
   // value) for (i) low quantizers (ii) regions where prediction is poor
   const int scale[5] = { INT_MAX, 4, 3, 3, 2 };
   const int qslope = 2 * (!is_luma_only);
   int aggr_factor = 1;
   if (!is_luma_only) {
     aggr_factor = AOMMAX(
         1, ((MAXQ - x->qindex) * qslope + QINDEX_RANGE / 2) >> QINDEX_BITS);
   }
   if (best_skip_rd >
       (x->source_variance << (num_pels_log2_lookup[bsize] + RDDIV_BITS)))
     aggr_factor *= scale[level];
   // For level setting 1, be more conservative for luma only case even when
   // prediction is good
   else if ((level <= 1) && !is_luma_only)
     aggr_factor *= 2;

   // Be more conservative for luma only cases (called from compound type rd)
   // since best_skip_rd is computed after and skip_rd is computed (with 8-bit
   // prediction signals blended for WEDGE/DIFFWTD rather than 16-bit) before
   // interpolation filter search
   const int luma_mul[5] = { INT_MAX, 32, 29, 20, 17 };
   int mul_factor = is_luma_only ? luma_mul[level] : 16;
   int64_t rd_thresh =
       (best_skip_rd == INT64_MAX)
           ? best_skip_rd
           : (int64_t)(best_skip_rd * aggr_factor * mul_factor >> 4);
   if (skip_rd > rd_thresh) eval_txfm = 0;
   return eval_txfm;
 }

 static TX_MODE select_tx_mode(
     const AV1_COMMON *cm, const TX_SIZE_SEARCH_METHOD tx_size_search_method) {
   if (cm->features.coded_lossless) return ONLY_4X4;
   if (tx_size_search_method == USE_LARGESTALL) {
     return TX_MODE_LARGEST;
   } else {
     assert(tx_size_search_method == USE_FULL_RD ||
            tx_size_search_method == USE_FAST_RD);
     return TX_MODE_SELECT;
   }
 }
 // Checks the conditions to enable winner mode processing
 static INLINE int is_winner_mode_processing_enabled(
     const struct AV1_COMP *cpi, MB_MODE_INFO *const mbmi,
     const PREDICTION_MODE best_mode) {
   const SPEED_FEATURES *sf = &cpi->sf;

   // TODO(any): Move block independent condition checks to frame level
   if (is_inter_block(mbmi, SHARED_PART)) {
     if (is_inter_mode(best_mode) &&
         sf->tx_sf.tx_type_search.fast_inter_tx_type_search &&
         !cpi->oxcf.txfm_cfg.use_inter_dct_only)
       return 1;
   } else {
     if (sf->tx_sf.tx_type_search.fast_intra_tx_type_search &&
         !cpi->oxcf.txfm_cfg.use_intra_default_tx_only &&
         !cpi->oxcf.txfm_cfg.use_intra_dct_only)
       return 1;
   }

   // Check speed feature related to winner mode processing
   if (sf->winner_mode_sf.enable_winner_mode_for_coeff_opt &&
       cpi->optimize_seg_arr[mbmi->segment_id] != NO_TRELLIS_OPT &&
       cpi->optimize_seg_arr[mbmi->segment_id] != FINAL_PASS_TRELLIS_OPT)
     return 1;
   if (sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch) return 1;

   return 0;
 }

 static INLINE void set_tx_size_search_method(
     const AV1_COMMON *cm, const WinnerModeParams *winner_mode_params,
     TxfmSearchParams *txfm_params, int enable_winner_mode_for_tx_size_srch,
     int is_winner_mode
 #if CONFIG_EXT_RECUR_PARTITIONS
     ,
     const MACROBLOCK *x, bool use_largest_tx_size_for_small_bsize
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 ) {
   // Populate transform size search method/transform mode appropriately
   txfm_params->tx_size_search_method =
       winner_mode_params->tx_size_search_methods[DEFAULT_EVAL];
   if (enable_winner_mode_for_tx_size_srch) {
     if (is_winner_mode)
       txfm_params->tx_size_search_method =
           winner_mode_params->tx_size_search_methods[WINNER_MODE_EVAL];
     else
       txfm_params->tx_size_search_method =
           winner_mode_params->tx_size_search_methods[MODE_EVAL];
   }

 #if CONFIG_EXT_RECUR_PARTITIONS
   const BLOCK_SIZE bsize = x->e_mbd.mi[0]->sb_type[0];
   if (!frame_is_intra_only(cm) && x->sb_enc.min_partition_size == BLOCK_4X4 &&
       use_largest_tx_size_for_small_bsize && is_bsize_geq(BLOCK_16X16, bsize)) {
     txfm_params->tx_size_search_method = USE_LARGESTALL;
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   txfm_params->tx_mode_search_type =
       select_tx_mode(cm, txfm_params->tx_size_search_method);
 }

 static INLINE void set_tx_type_prune(const SPEED_FEATURES *sf,
                                      TxfmSearchParams *txfm_params,
                                      int winner_mode_tx_type_pruning,
                                      int is_winner_mode) {
   // Populate prune transform mode appropriately
   txfm_params->prune_2d_txfm_mode = sf->tx_sf.tx_type_search.prune_2d_txfm_mode;
   if (!winner_mode_tx_type_pruning) return;

   const int prune_mode[2][2] = { { TX_TYPE_PRUNE_4, TX_TYPE_PRUNE_0 },
                                  { TX_TYPE_PRUNE_5, TX_TYPE_PRUNE_2 } };
   txfm_params->prune_2d_txfm_mode =
       prune_mode[winner_mode_tx_type_pruning - 1][is_winner_mode];
 }

 static INLINE void set_tx_domain_dist_params(
     const WinnerModeParams *winner_mode_params, TxfmSearchParams *txfm_params,
     int enable_winner_mode_for_tx_domain_dist, int is_winner_mode) {
   if (!enable_winner_mode_for_tx_domain_dist) {
     txfm_params->use_transform_domain_distortion =
         winner_mode_params->use_transform_domain_distortion[DEFAULT_EVAL];
     txfm_params->tx_domain_dist_threshold =
         winner_mode_params->tx_domain_dist_threshold[DEFAULT_EVAL];
     return;
   }

   if (is_winner_mode) {
     txfm_params->use_transform_domain_distortion =
         winner_mode_params->use_transform_domain_distortion[WINNER_MODE_EVAL];
     txfm_params->tx_domain_dist_threshold =
         winner_mode_params->tx_domain_dist_threshold[WINNER_MODE_EVAL];
   } else {
     txfm_params->use_transform_domain_distortion =
         winner_mode_params->use_transform_domain_distortion[MODE_EVAL];
     txfm_params->tx_domain_dist_threshold =
         winner_mode_params->tx_domain_dist_threshold[MODE_EVAL];
   }
 }

 // This function sets mode parameters for different mode evaluation stages
 static INLINE void set_mode_eval_params(const struct AV1_COMP *cpi,
                                         MACROBLOCK *x,
                                         MODE_EVAL_TYPE mode_eval_type) {
   const AV1_COMMON *cm = &cpi->common;
   const SPEED_FEATURES *sf = &cpi->sf;
   const WinnerModeParams *winner_mode_params = &cpi->winner_mode_params;
   TxfmSearchParams *txfm_params = &x->txfm_search_params;
   TxfmSearchInfo *txfm_info = &x->txfm_search_info;

   switch (mode_eval_type) {
     case DEFAULT_EVAL:
       txfm_params->use_default_inter_tx_type = 0;
       txfm_params->use_default_intra_tx_type = 0;
       txfm_params->skip_txfm_level =
           winner_mode_params->skip_txfm_level[DEFAULT_EVAL];
       txfm_params->predict_dc_level =
           winner_mode_params->predict_dc_level[DEFAULT_EVAL];
       // Set default transform domain distortion type
       set_tx_domain_dist_params(winner_mode_params, txfm_params, 0, 0);

       // Get default threshold for R-D optimization of coefficients
       txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
           winner_mode_params->coeff_opt_dist_threshold, 0, 0);
       txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
           winner_mode_params->coeff_opt_satd_threshold, 0, 0);

       // Set default transform size search method
       set_tx_size_search_method(cm, winner_mode_params, txfm_params, 0, 0
 #if CONFIG_EXT_RECUR_PARTITIONS
                                 ,
                                 x, sf->tx_sf.use_largest_tx_size_for_small_bsize
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       );
       // Set default transform type prune
       set_tx_type_prune(sf, txfm_params, 0, 0);
       break;
     case MODE_EVAL:
       txfm_params->use_default_intra_tx_type =
           (cpi->sf.tx_sf.tx_type_search.fast_intra_tx_type_search ||
            cpi->oxcf.txfm_cfg.use_intra_default_tx_only);
       txfm_params->use_default_inter_tx_type =
           cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_search;
       txfm_params->skip_txfm_level =
           winner_mode_params->skip_txfm_level[MODE_EVAL];
       txfm_params->predict_dc_level =
           winner_mode_params->predict_dc_level[MODE_EVAL];
       // Set transform domain distortion type for mode evaluation
       set_tx_domain_dist_params(
           winner_mode_params, txfm_params,
           sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist, 0);

       // Get threshold for R-D optimization of coefficients during mode
       // evaluation
       txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
           winner_mode_params->coeff_opt_dist_threshold,
           sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 0);
       txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
           winner_mode_params->coeff_opt_satd_threshold,
           sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 0);

       // Set the transform size search method for mode evaluation
       set_tx_size_search_method(
           cm, winner_mode_params, txfm_params,
           sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch, 0
 #if CONFIG_EXT_RECUR_PARTITIONS
           ,
           x, sf->tx_sf.use_largest_tx_size_for_small_bsize
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       );
       // Set transform type prune for mode evaluation
       set_tx_type_prune(sf, txfm_params,
                         sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning,
                         0);
       break;
     case WINNER_MODE_EVAL:
       txfm_params->use_default_inter_tx_type = 0;
       txfm_params->use_default_intra_tx_type = 0;
       txfm_params->skip_txfm_level =
           winner_mode_params->skip_txfm_level[WINNER_MODE_EVAL];
       txfm_params->predict_dc_level =
           winner_mode_params->predict_dc_level[WINNER_MODE_EVAL];

       // Set transform domain distortion type for winner mode evaluation
       set_tx_domain_dist_params(
           winner_mode_params, txfm_params,
           sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist, 1);

       // Get threshold for R-D optimization of coefficients for winner mode
       // evaluation
       txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
           winner_mode_params->coeff_opt_dist_threshold,
           sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 1);
       txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
           winner_mode_params->coeff_opt_satd_threshold,
           sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 1);

       // Set the transform size search method for winner mode evaluation
       set_tx_size_search_method(
           cm, winner_mode_params, txfm_params,
           sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1
 #if CONFIG_EXT_RECUR_PARTITIONS
           ,
           x, sf->tx_sf.use_largest_tx_size_for_small_bsize
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       );
       // Set default transform type prune mode for winner mode evaluation
       set_tx_type_prune(sf, txfm_params,
                         sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning,
                         1);

       // Reset hash state for winner mode processing. Winner mode and subsequent
       // transform/mode evaluations (palette/IntraBC) cann't reuse old data as
       // the decisions would have been sub-optimal
       // TODO(any): Move the evaluation of palette/IntraBC modes before winner
       // mode is processed and clean-up the code below
       reset_hash_records(txfm_info, cpi->sf.tx_sf.use_inter_txb_hash);

       break;
     default: assert(0);
   }
 }

 // Similar to store_cfl_required(), but for use during the RDO process,
 // where we haven't yet determined whether this block uses CfL.
 static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm,
                                                       const MACROBLOCK *x) {
   const MACROBLOCKD *xd = &x->e_mbd;

   if (cm->seq_params.monochrome || !xd->is_chroma_ref) return CFL_DISALLOWED;

   if (!xd->is_chroma_ref) {
     // For non-chroma-reference blocks, we should always store the luma pixels,
     // in case the corresponding chroma-reference block uses CfL.
     // Note that this can only happen for block sizes which are <8 on
     // their shortest side, as otherwise they would be chroma reference
     // blocks.
     return CFL_ALLOWED;
   }

   // For chroma reference blocks, we should store data in the encoder iff we're
   // allowed to try out CfL.
   return is_cfl_allowed(xd);
 }

 static AOM_INLINE void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
   mbmi->uv_mode = UV_DC_PRED;
   mbmi->palette_mode_info.palette_size[1] = 0;
 }

 // Store best mode stats for winner mode processing
 static INLINE void store_winner_mode_stats(
     const AV1_COMMON *const cm, MACROBLOCK *x, const MB_MODE_INFO *mbmi,
     RD_STATS *rd_cost, RD_STATS *rd_cost_y, RD_STATS *rd_cost_uv,
     const MV_REFERENCE_FRAME *refs, PREDICTION_MODE mode, uint8_t *color_map,
     BLOCK_SIZE bsize, int64_t this_rd, int multi_winner_mode_type,
     int txfm_search_done) {
   WinnerModeStats *winner_mode_stats = x->winner_mode_stats;
   int mode_idx = 0;
   int is_palette_mode = mbmi->palette_mode_info.palette_size[PLANE_TYPE_Y] > 0;
   // Mode stat is not required when multiwinner mode processing is disabled
   if (multi_winner_mode_type == MULTI_WINNER_MODE_OFF) return;
   // Ignore mode with maximum rd
   if (this_rd == INT64_MAX) return;
   // TODO(any): Winner mode processing is currently not applicable for palette
   // mode in Inter frames. Clean-up the following code, once support is added
   if (!frame_is_intra_only(cm) && is_palette_mode) return;

   int max_winner_mode_count = frame_is_intra_only(cm)
                                   ? MAX_WINNER_MODE_COUNT_INTRA
                                   : MAX_WINNER_MODE_COUNT_INTER;
   max_winner_mode_count = (multi_winner_mode_type == MULTI_WINNER_MODE_FAST)
                               ? AOMMIN(max_winner_mode_count, 2)
                               : max_winner_mode_count;
   assert(x->winner_mode_count >= 0 &&
          x->winner_mode_count <= max_winner_mode_count);

   if (x->winner_mode_count) {
     // Find the mode which has higher rd cost than this_rd
     for (mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++)
       if (winner_mode_stats[mode_idx].rd > this_rd) break;

     if (mode_idx == max_winner_mode_count) {
       // No mode has higher rd cost than this_rd
       return;
     } else if (mode_idx < max_winner_mode_count - 1) {
       // Create a slot for current mode and move others to the next slot
       memmove(
           &winner_mode_stats[mode_idx + 1], &winner_mode_stats[mode_idx],
           (max_winner_mode_count - mode_idx - 1) * sizeof(*winner_mode_stats));
     }
   }
   // Add a mode stat for winner mode processing
   winner_mode_stats[mode_idx].mbmi = *mbmi;
   winner_mode_stats[mode_idx].rd = this_rd;
   winner_mode_stats[mode_idx].mode = mode;
   winner_mode_stats[mode_idx].refs[0] = refs[0];
   winner_mode_stats[mode_idx].refs[1] = refs[1];

   // Update rd stats required for inter frame
   if (!frame_is_intra_only(cm) && rd_cost && rd_cost_y && rd_cost_uv) {
     const MACROBLOCKD *xd = &x->e_mbd;
     const int skip_ctx = av1_get_skip_txfm_context(xd);
     const int is_intra_mode = mode < INTRA_MODE_END;
     const int skip_txfm =
         mbmi->skip_txfm[xd->tree_type == CHROMA_PART] && !is_intra_mode;

     winner_mode_stats[mode_idx].rd_cost = *rd_cost;
     if (txfm_search_done) {
       winner_mode_stats[mode_idx].rate_y =
           rd_cost_y->rate +
           x->mode_costs
               .skip_txfm_cost[skip_ctx][rd_cost->skip_txfm || skip_txfm];
       winner_mode_stats[mode_idx].rate_uv = rd_cost_uv->rate;
     }
   }

   if (color_map) {
     // Store color_index_map for palette mode
     const MACROBLOCKD *const xd = &x->e_mbd;
     int block_width, block_height;
     av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
                              &block_height, NULL, NULL);
     memcpy(winner_mode_stats[mode_idx].color_index_map, color_map,
            block_width * block_height * sizeof(color_map[0]));
   }

   x->winner_mode_count =
       AOMMIN(x->winner_mode_count + 1, max_winner_mode_count);
 }

 unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
                                                 const struct buf_2d *ref,
                                                 BLOCK_SIZE bs, int bd);

 #ifdef __cplusplus
 }  // extern "C"
 #endif

 #endif  // AOM_AV1_ENCODER_RDOPT_UTILS_H_
	/*
	* Copyright (c) 2021, Alliance for Open Media. All rights reserved
	*
	* This source code is subject to the terms of the BSD 3-Clause Clear License
	* and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
	* License was not distributed with this source code in the LICENSE file, you
	* can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the
	* Alliance for Open Media Patent License 1.0 was not distributed with this
	* source code in the PATENTS file, you can obtain it at
	* aomedia.org/license/patent-license/.
	*/

	#ifndef AOM_AV1_ENCODER_RDOPT_UTILS_H_
	#define AOM_AV1_ENCODER_RDOPT_UTILS_H_

	#include "aom/aom_integer.h"
	#include "av1/encoder/block.h"
	#include "av1/common/cfl.h"
	#include "av1/common/pred_common.h"

	#ifdef __cplusplus
	extern "C" {
	#endif

	#define MAX_REF_MV_SEARCH (MAX_REF_MV_STACK_SIZE)
	#define INTER_INTRA_RD_THRESH_SCALE 9
	#define INTER_INTRA_RD_THRESH_SHIFT 4

	typedef struct {
	PREDICTION_MODE mode;
	MV_REFERENCE_FRAME ref_frame[2];
	} MODE_DEFINITION;

	static AOM_INLINE void restore_dst_buf(MACROBLOCKD *xd, const BUFFER_SET dst,
	const int num_planes) {
	for (int i = 0; i < num_planes; i++) {
	xd->plane[i].dst.buf = dst.plane[i];
	xd->plane[i].dst.stride = dst.stride[i];
	}
	}

	/* clang-format on */
	// Calculate rd threshold based on ref best rd and relevant scaling factors
	static AOM_INLINE int64_t get_rd_thresh_from_best_rd(int64_t ref_best_rd,
	int mul_factor,
	int div_factor) {
	int64_t rd_thresh = ref_best_rd;
	if (div_factor != 0) {
	rd_thresh = ref_best_rd < (div_factor * (INT64_MAX / mul_factor))
	? ((ref_best_rd / div_factor) * mul_factor)
	: INT64_MAX;
	}
	return rd_thresh;
	}

	static AOM_INLINE int inter_mode_data_block_idx(BLOCK_SIZE bsize) {
	if (bsize == BLOCK_4X4 \|\| bsize == BLOCK_4X8 \|\| bsize == BLOCK_8X4 \|\|
	bsize == BLOCK_4X16 \|\| bsize == BLOCK_16X4) {
	return -1;
	}
	return 1;
	}

	// Get transform block visible dimensions cropped to the MI units.
	static AOM_INLINE void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
	BLOCK_SIZE plane_bsize, int blk_row,
	int blk_col, BLOCK_SIZE tx_bsize,
	int width, int height,
	int *visible_width,
	int *visible_height) {
	const int txb_height = block_size_high[tx_bsize];
	const int txb_width = block_size_wide[tx_bsize];
	assert(txb_height <= block_size_high[plane_bsize]);
	assert(txb_width <= block_size_wide[plane_bsize]);
	const struct macroblockd_plane *const pd = &xd->plane[plane];

	// TODO(aconverse@google.com): Investigate using crop_width/height here rather
	// than the MI size
	if (xd->mb_to_bottom_edge >= 0) {
	*visible_height = txb_height;
	} else {
	const int block_height = block_size_high[plane_bsize];
	const int block_rows =
	(xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
	*visible_height =
	clamp(block_rows - (blk_row << MI_SIZE_LOG2), 0, txb_height);
	}
	if (height) *height = txb_height;

	if (xd->mb_to_right_edge >= 0) {
	*visible_width = txb_width;
	} else {
	const int block_width = block_size_wide[plane_bsize];
	const int block_cols =
	(xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
	*visible_width =
	clamp(block_cols - (blk_col << MI_SIZE_LOG2), 0, txb_width);
	}
	if (width) *width = txb_width;
	}

	static AOM_INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
	int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * MI_SIZE_LOG2);
	return num_blk;
	}

	static INLINE int check_txfm_eval(MACROBLOCK *const x, BLOCK_SIZE bsize,
	int64_t best_skip_rd, int64_t skip_rd,
	int level, int is_luma_only) {
	int eval_txfm = 1;
	// Derive aggressiveness factor for gating the transform search
	// Lower value indicates more aggressiveness. Be more conservative (high
	// value) for (i) low quantizers (ii) regions where prediction is poor
	const int scale[5] = { INT_MAX, 4, 3, 3, 2 };
	const int qslope = 2 * (!is_luma_only);
	int aggr_factor = 1;
	if (!is_luma_only) {
	aggr_factor = AOMMAX(
	1, ((MAXQ - x->qindex) * qslope + QINDEX_RANGE / 2) >> QINDEX_BITS);
	}
	if (best_skip_rd >
	(x->source_variance << (num_pels_log2_lookup[bsize] + RDDIV_BITS)))
	aggr_factor *= scale[level];
	// For level setting 1, be more conservative for luma only case even when
	// prediction is good
	else if ((level <= 1) && !is_luma_only)
	aggr_factor *= 2;

	// Be more conservative for luma only cases (called from compound type rd)
	// since best_skip_rd is computed after and skip_rd is computed (with 8-bit
	// prediction signals blended for WEDGE/DIFFWTD rather than 16-bit) before
	// interpolation filter search
	const int luma_mul[5] = { INT_MAX, 32, 29, 20, 17 };
	int mul_factor = is_luma_only ? luma_mul[level] : 16;
	int64_t rd_thresh =
	(best_skip_rd == INT64_MAX)
	? best_skip_rd
	: (int64_t)(best_skip_rd * aggr_factor * mul_factor >> 4);
	if (skip_rd > rd_thresh) eval_txfm = 0;
	return eval_txfm;
	}

	static TX_MODE select_tx_mode(
	const AV1_COMMON *cm, const TX_SIZE_SEARCH_METHOD tx_size_search_method) {
	if (cm->features.coded_lossless) return ONLY_4X4;
	if (tx_size_search_method == USE_LARGESTALL) {
	return TX_MODE_LARGEST;
	} else {
	assert(tx_size_search_method == USE_FULL_RD \|\|
	tx_size_search_method == USE_FAST_RD);
	return TX_MODE_SELECT;
	}
	}
	// Checks the conditions to enable winner mode processing
	static INLINE int is_winner_mode_processing_enabled(
	const struct AV1_COMP cpi, MB_MODE_INFO const mbmi,
	const PREDICTION_MODE best_mode) {
	const SPEED_FEATURES *sf = &cpi->sf;

	// TODO(any): Move block independent condition checks to frame level
	if (is_inter_block(mbmi, SHARED_PART)) {
	if (is_inter_mode(best_mode) &&
	sf->tx_sf.tx_type_search.fast_inter_tx_type_search &&
	!cpi->oxcf.txfm_cfg.use_inter_dct_only)
	return 1;
	} else {
	if (sf->tx_sf.tx_type_search.fast_intra_tx_type_search &&
	!cpi->oxcf.txfm_cfg.use_intra_default_tx_only &&
	!cpi->oxcf.txfm_cfg.use_intra_dct_only)
	return 1;
	}

	// Check speed feature related to winner mode processing
	if (sf->winner_mode_sf.enable_winner_mode_for_coeff_opt &&
	cpi->optimize_seg_arr[mbmi->segment_id] != NO_TRELLIS_OPT &&
	cpi->optimize_seg_arr[mbmi->segment_id] != FINAL_PASS_TRELLIS_OPT)
	return 1;
	if (sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch) return 1;

	return 0;
	}

	static INLINE void set_tx_size_search_method(
	const AV1_COMMON cm, const WinnerModeParams winner_mode_params,
	TxfmSearchParams *txfm_params, int enable_winner_mode_for_tx_size_srch,
	int is_winner_mode
	#if CONFIG_EXT_RECUR_PARTITIONS
	,
	const MACROBLOCK *x, bool use_largest_tx_size_for_small_bsize
	#endif // CONFIG_EXT_RECUR_PARTITIONS
	) {
	// Populate transform size search method/transform mode appropriately
	txfm_params->tx_size_search_method =
	winner_mode_params->tx_size_search_methods[DEFAULT_EVAL];
	if (enable_winner_mode_for_tx_size_srch) {
	if (is_winner_mode)
	txfm_params->tx_size_search_method =
	winner_mode_params->tx_size_search_methods[WINNER_MODE_EVAL];
	else
	txfm_params->tx_size_search_method =
	winner_mode_params->tx_size_search_methods[MODE_EVAL];
	}

	#if CONFIG_EXT_RECUR_PARTITIONS
	const BLOCK_SIZE bsize = x->e_mbd.mi[0]->sb_type[0];
	if (!frame_is_intra_only(cm) && x->sb_enc.min_partition_size == BLOCK_4X4 &&
	use_largest_tx_size_for_small_bsize && is_bsize_geq(BLOCK_16X16, bsize)) {
	txfm_params->tx_size_search_method = USE_LARGESTALL;
	}
	#endif // CONFIG_EXT_RECUR_PARTITIONS
	txfm_params->tx_mode_search_type =
	select_tx_mode(cm, txfm_params->tx_size_search_method);
	}

	static INLINE void set_tx_type_prune(const SPEED_FEATURES *sf,
	TxfmSearchParams *txfm_params,
	int winner_mode_tx_type_pruning,
	int is_winner_mode) {
	// Populate prune transform mode appropriately
	txfm_params->prune_2d_txfm_mode = sf->tx_sf.tx_type_search.prune_2d_txfm_mode;
	if (!winner_mode_tx_type_pruning) return;

	const int prune_mode[2][2] = { { TX_TYPE_PRUNE_4, TX_TYPE_PRUNE_0 },
	{ TX_TYPE_PRUNE_5, TX_TYPE_PRUNE_2 } };
	txfm_params->prune_2d_txfm_mode =
	prune_mode[winner_mode_tx_type_pruning - 1][is_winner_mode];
	}

	static INLINE void set_tx_domain_dist_params(
	const WinnerModeParams winner_mode_params, TxfmSearchParams txfm_params,
	int enable_winner_mode_for_tx_domain_dist, int is_winner_mode) {
	if (!enable_winner_mode_for_tx_domain_dist) {
	txfm_params->use_transform_domain_distortion =
	winner_mode_params->use_transform_domain_distortion[DEFAULT_EVAL];
	txfm_params->tx_domain_dist_threshold =
	winner_mode_params->tx_domain_dist_threshold[DEFAULT_EVAL];
	return;
	}

	if (is_winner_mode) {
	txfm_params->use_transform_domain_distortion =
	winner_mode_params->use_transform_domain_distortion[WINNER_MODE_EVAL];
	txfm_params->tx_domain_dist_threshold =
	winner_mode_params->tx_domain_dist_threshold[WINNER_MODE_EVAL];
	} else {
	txfm_params->use_transform_domain_distortion =
	winner_mode_params->use_transform_domain_distortion[MODE_EVAL];
	txfm_params->tx_domain_dist_threshold =
	winner_mode_params->tx_domain_dist_threshold[MODE_EVAL];
	}
	}

	// This function sets mode parameters for different mode evaluation stages
	static INLINE void set_mode_eval_params(const struct AV1_COMP *cpi,
	MACROBLOCK *x,
	MODE_EVAL_TYPE mode_eval_type) {
	const AV1_COMMON *cm = &cpi->common;
	const SPEED_FEATURES *sf = &cpi->sf;
	const WinnerModeParams *winner_mode_params = &cpi->winner_mode_params;
	TxfmSearchParams *txfm_params = &x->txfm_search_params;
	TxfmSearchInfo *txfm_info = &x->txfm_search_info;

	switch (mode_eval_type) {
	case DEFAULT_EVAL:
	txfm_params->use_default_inter_tx_type = 0;
	txfm_params->use_default_intra_tx_type = 0;
	txfm_params->skip_txfm_level =
	winner_mode_params->skip_txfm_level[DEFAULT_EVAL];
	txfm_params->predict_dc_level =
	winner_mode_params->predict_dc_level[DEFAULT_EVAL];
	// Set default transform domain distortion type
	set_tx_domain_dist_params(winner_mode_params, txfm_params, 0, 0);

	// Get default threshold for R-D optimization of coefficients
	txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
	winner_mode_params->coeff_opt_dist_threshold, 0, 0);
	txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
	winner_mode_params->coeff_opt_satd_threshold, 0, 0);

	// Set default transform size search method
	set_tx_size_search_method(cm, winner_mode_params, txfm_params, 0, 0
	#if CONFIG_EXT_RECUR_PARTITIONS
	,
	x, sf->tx_sf.use_largest_tx_size_for_small_bsize
	#endif // CONFIG_EXT_RECUR_PARTITIONS
	);
	// Set default transform type prune
	set_tx_type_prune(sf, txfm_params, 0, 0);
	break;
	case MODE_EVAL:
	txfm_params->use_default_intra_tx_type =
	(cpi->sf.tx_sf.tx_type_search.fast_intra_tx_type_search \|\|
	cpi->oxcf.txfm_cfg.use_intra_default_tx_only);
	txfm_params->use_default_inter_tx_type =
	cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_search;
	txfm_params->skip_txfm_level =
	winner_mode_params->skip_txfm_level[MODE_EVAL];
	txfm_params->predict_dc_level =
	winner_mode_params->predict_dc_level[MODE_EVAL];
	// Set transform domain distortion type for mode evaluation
	set_tx_domain_dist_params(
	winner_mode_params, txfm_params,
	sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist, 0);

	// Get threshold for R-D optimization of coefficients during mode
	// evaluation
	txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
	winner_mode_params->coeff_opt_dist_threshold,
	sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 0);
	txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
	winner_mode_params->coeff_opt_satd_threshold,
	sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 0);

	// Set the transform size search method for mode evaluation
	set_tx_size_search_method(
	cm, winner_mode_params, txfm_params,
	sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch, 0
	#if CONFIG_EXT_RECUR_PARTITIONS
	,
	x, sf->tx_sf.use_largest_tx_size_for_small_bsize
	#endif // CONFIG_EXT_RECUR_PARTITIONS
	);
	// Set transform type prune for mode evaluation
	set_tx_type_prune(sf, txfm_params,
	sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning,
	0);
	break;
	case WINNER_MODE_EVAL:
	txfm_params->use_default_inter_tx_type = 0;
	txfm_params->use_default_intra_tx_type = 0;
	txfm_params->skip_txfm_level =
	winner_mode_params->skip_txfm_level[WINNER_MODE_EVAL];
	txfm_params->predict_dc_level =
	winner_mode_params->predict_dc_level[WINNER_MODE_EVAL];

	// Set transform domain distortion type for winner mode evaluation
	set_tx_domain_dist_params(
	winner_mode_params, txfm_params,
	sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist, 1);

	// Get threshold for R-D optimization of coefficients for winner mode
	// evaluation
	txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
	winner_mode_params->coeff_opt_dist_threshold,
	sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 1);
	txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
	winner_mode_params->coeff_opt_satd_threshold,
	sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 1);

	// Set the transform size search method for winner mode evaluation
	set_tx_size_search_method(
	cm, winner_mode_params, txfm_params,
	sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch, 1
	#if CONFIG_EXT_RECUR_PARTITIONS
	,
	x, sf->tx_sf.use_largest_tx_size_for_small_bsize
	#endif // CONFIG_EXT_RECUR_PARTITIONS
	);
	// Set default transform type prune mode for winner mode evaluation
	set_tx_type_prune(sf, txfm_params,
	sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning,
	1);

	// Reset hash state for winner mode processing. Winner mode and subsequent
	// transform/mode evaluations (palette/IntraBC) cann't reuse old data as
	// the decisions would have been sub-optimal
	// TODO(any): Move the evaluation of palette/IntraBC modes before winner
	// mode is processed and clean-up the code below
	reset_hash_records(txfm_info, cpi->sf.tx_sf.use_inter_txb_hash);

	break;
	default: assert(0);
	}
	}

	// Similar to store_cfl_required(), but for use during the RDO process,
	// where we haven't yet determined whether this block uses CfL.
	static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm,
	const MACROBLOCK *x) {
	const MACROBLOCKD *xd = &x->e_mbd;

	if (cm->seq_params.monochrome \|\| !xd->is_chroma_ref) return CFL_DISALLOWED;

	if (!xd->is_chroma_ref) {
	// For non-chroma-reference blocks, we should always store the luma pixels,
	// in case the corresponding chroma-reference block uses CfL.
	// Note that this can only happen for block sizes which are <8 on
	// their shortest side, as otherwise they would be chroma reference
	// blocks.
	return CFL_ALLOWED;
	}

	// For chroma reference blocks, we should store data in the encoder iff we're
	// allowed to try out CfL.
	return is_cfl_allowed(xd);
	}

	static AOM_INLINE void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
	mbmi->uv_mode = UV_DC_PRED;
	mbmi->palette_mode_info.palette_size[1] = 0;
	}

	// Store best mode stats for winner mode processing
	static INLINE void store_winner_mode_stats(
	const AV1_COMMON const cm, MACROBLOCK x, const MB_MODE_INFO *mbmi,
	RD_STATS rd_cost, RD_STATS rd_cost_y, RD_STATS *rd_cost_uv,
	const MV_REFERENCE_FRAME refs, PREDICTION_MODE mode, uint8_t color_map,
	BLOCK_SIZE bsize, int64_t this_rd, int multi_winner_mode_type,
	int txfm_search_done) {
	WinnerModeStats *winner_mode_stats = x->winner_mode_stats;
	int mode_idx = 0;
	int is_palette_mode = mbmi->palette_mode_info.palette_size[PLANE_TYPE_Y] > 0;
	// Mode stat is not required when multiwinner mode processing is disabled
	if (multi_winner_mode_type == MULTI_WINNER_MODE_OFF) return;
	// Ignore mode with maximum rd
	if (this_rd == INT64_MAX) return;
	// TODO(any): Winner mode processing is currently not applicable for palette
	// mode in Inter frames. Clean-up the following code, once support is added
	if (!frame_is_intra_only(cm) && is_palette_mode) return;

	int max_winner_mode_count = frame_is_intra_only(cm)
	? MAX_WINNER_MODE_COUNT_INTRA
	: MAX_WINNER_MODE_COUNT_INTER;
	max_winner_mode_count = (multi_winner_mode_type == MULTI_WINNER_MODE_FAST)
	? AOMMIN(max_winner_mode_count, 2)
	: max_winner_mode_count;
	assert(x->winner_mode_count >= 0 &&
	x->winner_mode_count <= max_winner_mode_count);

	if (x->winner_mode_count) {
	// Find the mode which has higher rd cost than this_rd
	for (mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++)
	if (winner_mode_stats[mode_idx].rd > this_rd) break;

	if (mode_idx == max_winner_mode_count) {
	// No mode has higher rd cost than this_rd
	return;
	} else if (mode_idx < max_winner_mode_count - 1) {
	// Create a slot for current mode and move others to the next slot
	memmove(
	&winner_mode_stats[mode_idx + 1], &winner_mode_stats[mode_idx],
	(max_winner_mode_count - mode_idx - 1) * sizeof(*winner_mode_stats));
	}
	}
	// Add a mode stat for winner mode processing
	winner_mode_stats[mode_idx].mbmi = *mbmi;
	winner_mode_stats[mode_idx].rd = this_rd;
	winner_mode_stats[mode_idx].mode = mode;
	winner_mode_stats[mode_idx].refs[0] = refs[0];
	winner_mode_stats[mode_idx].refs[1] = refs[1];

	// Update rd stats required for inter frame
	if (!frame_is_intra_only(cm) && rd_cost && rd_cost_y && rd_cost_uv) {
	const MACROBLOCKD *xd = &x->e_mbd;
	const int skip_ctx = av1_get_skip_txfm_context(xd);
	const int is_intra_mode = mode < INTRA_MODE_END;
	const int skip_txfm =
	mbmi->skip_txfm[xd->tree_type == CHROMA_PART] && !is_intra_mode;

	winner_mode_stats[mode_idx].rd_cost = *rd_cost;
	if (txfm_search_done) {
	winner_mode_stats[mode_idx].rate_y =
	rd_cost_y->rate +
	x->mode_costs
	.skip_txfm_cost[skip_ctx][rd_cost->skip_txfm \|\| skip_txfm];
	winner_mode_stats[mode_idx].rate_uv = rd_cost_uv->rate;
	}
	}

	if (color_map) {
	// Store color_index_map for palette mode
	const MACROBLOCKD *const xd = &x->e_mbd;
	int block_width, block_height;
	av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
	&block_height, NULL, NULL);
	memcpy(winner_mode_stats[mode_idx].color_index_map, color_map,
	block_width * block_height * sizeof(color_map[0]));
	}

	x->winner_mode_count =
	AOMMIN(x->winner_mode_count + 1, max_winner_mode_count);
	}

	unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
	const struct buf_2d *ref,
	BLOCK_SIZE bs, int bd);

	#ifdef __cplusplus
	} // extern "C"
	#endif

	#endif // AOM_AV1_ENCODER_RDOPT_UTILS_H_