Skip trellis opt based on SATD
Introduced a speed feature 'perform_coeff_opt_based_on_satd'
to skip trellis optimization of coefficients based on sum of
absolute transformed differences. This speed feature is enabled
for cpu-used 5.
Encode Time BD-Rate Loss
cpu-used Reduction avg.psnr ovr.psnr ssim
5 1.728% -0.0182% -0.0148% -0.0226%
STATS_CHANGED
Change-Id: I2affee4d9d678e1baf50081d07c5afef88b7111d
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 053619d..8e5fb6f 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -294,10 +294,11 @@
// Try to prune 2d transforms based on 1d transform results.
int prune_2d_txfm_mode;
- // The following four parameters are copied from WinnerModeParams based on the
+ // The following six parameters are copied from WinnerModeParams based on the
// current evaluation mode. See the documentation for WinnerModeParams for
// more detail.
unsigned int coeff_opt_dist_threshold;
+ unsigned int coeff_opt_satd_threshold;
unsigned int tx_domain_dist_threshold;
TX_SIZE_SEARCH_METHOD tx_size_search_method;
unsigned int use_transform_domain_distortion;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 2c85615..fe774ef 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -263,6 +263,25 @@
void av1_xform_quant(MACROBLOCK *x, int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize, TxfmParam *txfm_param,
QUANT_PARAM *qparam) {
+ av1_xform(x, plane, block, blk_row, blk_col, plane_bsize, txfm_param);
+ av1_quant(x, plane, block, txfm_param, qparam);
+}
+
+void av1_xform(MACROBLOCK *x, int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TxfmParam *txfm_param) {
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const int block_offset = BLOCK_OFFSET(block);
+ tran_low_t *const coeff = p->coeff + block_offset;
+ const int diff_stride = block_size_wide[plane_bsize];
+
+ const int src_offset = (blk_row * diff_stride + blk_col);
+ const int16_t *src_diff = &p->src_diff[src_offset << MI_SIZE_LOG2];
+
+ av1_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
+}
+
+void av1_quant(MACROBLOCK *x, int plane, int block, TxfmParam *txfm_param,
+ QUANT_PARAM *qparam) {
const struct macroblock_plane *const p = &x->plane[plane];
const SCAN_ORDER *const scan_order =
get_scan(txfm_param->tx_size, txfm_param->tx_type);
@@ -271,12 +290,6 @@
tran_low_t *const qcoeff = p->qcoeff + block_offset;
tran_low_t *const dqcoeff = p->dqcoeff + block_offset;
uint16_t *const eob = &p->eobs[block];
- const int diff_stride = block_size_wide[plane_bsize];
-
- const int src_offset = (blk_row * diff_stride + blk_col);
- const int16_t *src_diff = &p->src_diff[src_offset << MI_SIZE_LOG2];
-
- av1_fwd_txfm(src_diff, coeff, diff_stride, txfm_param);
if (qparam->xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
const int n_coeffs = av1_get_max_eob(txfm_param->tx_size);
@@ -300,7 +313,6 @@
p->txb_entropy_ctx[block] =
(uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
}
- return;
}
void av1_setup_xform(const AV1_COMMON *cm, MACROBLOCK *x, TX_SIZE tx_size,
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index a337c83..4160f82 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -76,6 +76,12 @@
int blk_col, BLOCK_SIZE plane_bsize, TxfmParam *txfm_param,
QUANT_PARAM *qparam);
+void av1_xform(MACROBLOCK *x, int plane, int block, int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize, TxfmParam *txfm_param);
+
+void av1_quant(MACROBLOCK *x, int plane, int block, TxfmParam *txfm_param,
+ QUANT_PARAM *qparam);
+
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
int block, TX_SIZE tx_size, TX_TYPE tx_type,
const TXB_CTX *const txb_ctx, int fast_mode, int *rate_cost);
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 1e1b7a9..db6b2f7 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -1094,6 +1094,11 @@
// Corresponds to enable_winner_mode_for_coeff_opt speed feature.
unsigned int coeff_opt_dist_threshold[MODE_EVAL_TYPES];
+ // Threshold to determine if trellis optimization is to be enabled
+ // based on SATD.
+ // Corresponds to enable_winner_mode_for_coeff_opt speed feature.
+ unsigned int coeff_opt_satd_threshold[MODE_EVAL_TYPES];
+
// Determines the tx size search method during rdopt.
// Corresponds to enable_winner_mode_for_tx_size_srch speed feature.
TX_SIZE_SEARCH_METHOD tx_size_search_methods[MODE_EVAL_TYPES];
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index 532c08c..fbed908 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -481,6 +481,9 @@
// Get default threshold for R-D optimization of coefficients
txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
winner_mode_params->coeff_opt_dist_threshold, 0, 0);
+ txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
+ winner_mode_params->coeff_opt_satd_threshold, 0, 0);
+
// Set default transform size search method
set_tx_size_search_method(cm, winner_mode_params, txfm_params, 0, 0);
// Set default transform type prune
@@ -505,6 +508,10 @@
txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
winner_mode_params->coeff_opt_dist_threshold,
sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 0);
+ txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
+ winner_mode_params->coeff_opt_satd_threshold,
+ sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 0);
+
// Set the transform size search method for mode evaluation
set_tx_size_search_method(
cm, winner_mode_params, txfm_params,
@@ -530,6 +537,10 @@
txfm_params->coeff_opt_dist_threshold = get_rd_opt_coeff_thresh(
winner_mode_params->coeff_opt_dist_threshold,
sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 1);
+ txfm_params->coeff_opt_satd_threshold = get_rd_opt_coeff_thresh(
+ winner_mode_params->coeff_opt_satd_threshold,
+ sf->winner_mode_sf.enable_winner_mode_for_coeff_opt, 1);
+
// Set the transform size search method for winner mode evaluation
set_tx_size_search_method(
cm, winner_mode_params, txfm_params,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index c9cd96c..0495f4c 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -84,6 +84,12 @@
{ 216, 0, UINT_MAX }
};
+static unsigned int coeff_opt_satd_thresholds[3][MODE_EVAL_TYPES] = {
+ { UINT_MAX, UINT_MAX, UINT_MAX },
+ { 97, 16, UINT_MAX },
+ { 25, 10, UINT_MAX },
+};
+
// Transform size to be used for default, mode and winner mode evaluation
// Index 0: Default mode evaluation, Winner mode processing is not applicable
// (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation.
@@ -602,6 +608,9 @@
sf->mv_sf.reduce_search_range = 1;
sf->tpl_sf.prune_starting_mv = 3;
+
+ sf->rd_sf.perform_coeff_opt_based_on_satd =
+ is_boosted_arf2_bwd_type ? 1 : 2;
}
if (speed >= 6) {
@@ -1103,6 +1112,7 @@
rd_sf->tx_domain_dist_thres_level = 0;
rd_sf->use_hash_based_trellis = 0;
rd_sf->perform_coeff_opt = 0;
+ rd_sf->perform_coeff_opt_based_on_satd = 0;
}
static AOM_INLINE void init_winner_mode_sf(
@@ -1262,6 +1272,14 @@
coeff_opt_dist_thresholds[cpi->sf.rd_sf.perform_coeff_opt],
sizeof(winner_mode_params->coeff_opt_dist_threshold));
+ // assert ensures that coeff_opt_satd_thresholds is accessed correctly
+ assert(cpi->sf.rd_sf.perform_coeff_opt_based_on_satd >= 0 &&
+ cpi->sf.rd_sf.perform_coeff_opt_based_on_satd < 3);
+ memcpy(
+ winner_mode_params->coeff_opt_satd_threshold,
+ coeff_opt_satd_thresholds[cpi->sf.rd_sf.perform_coeff_opt_based_on_satd],
+ sizeof(winner_mode_params->coeff_opt_satd_threshold));
+
// assert ensures that predict_skip_levels is accessed correctly
assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 &&
cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3);
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 768d634..ad470cf 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -821,6 +821,11 @@
// Flag used to control the extent of coeff R-D optimization
int perform_coeff_opt;
+
+ // Enable coeff R-D optimization based on SATD values.
+ // 0 : Do not disable coeff R-D opt.
+ // 1, 2 : Disable coeff R-D opt with progressively increasing aggressiveness.
+ int perform_coeff_opt_based_on_satd;
} RD_CALC_SPEED_FEATURES;
typedef struct WINNER_MODE_SPEED_FEATURES {
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 97d1a5f..663e272 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -74,6 +74,12 @@
TX_8X8, TX_8X8, TX_16X16, TX_16X16,
};
+// look-up table for sqrt of number of pixels in a transform block
+// rounded up to the nearest integer.
+static const int sqrt_tx_pixels_2d[TX_SIZES_ALL] = { 4, 8, 16, 32, 32, 6, 6,
+ 12, 12, 23, 23, 32, 32, 8,
+ 8, 16, 16, 23, 23 };
+
static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record,
const uint32_t hash) {
// Linear search through the circular buffer to find matching hash.
@@ -2088,6 +2094,37 @@
return cost;
}
+static int skip_trellis_opt_based_on_satd(MACROBLOCK *x,
+ QUANT_PARAM *quant_param, int plane,
+ int block, TX_SIZE tx_size,
+ int quant_b_adapt, int qstep,
+ unsigned int coeff_opt_satd_threshold,
+ int skip_trellis) {
+ if (skip_trellis || (coeff_opt_satd_threshold == UINT_MAX))
+ return skip_trellis;
+
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const int block_offset = BLOCK_OFFSET(block);
+ tran_low_t *const coeff_ptr = p->coeff + block_offset;
+ const int n_coeffs = av1_get_max_eob(tx_size);
+ const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size));
+ int satd = aom_satd(coeff_ptr, n_coeffs);
+ satd = RIGHT_SIGNED_SHIFT(satd, shift);
+
+ const int skip_block_trellis =
+ ((uint64_t)satd >
+ (uint64_t)coeff_opt_satd_threshold * qstep * sqrt_tx_pixels_2d[tx_size]);
+
+ av1_setup_quant(
+ tx_size, !skip_block_trellis,
+ skip_block_trellis
+ ? (USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP)
+ : AV1_XFORM_QUANT_FP,
+ quant_b_adapt, quant_param);
+
+ return skip_block_trellis;
+}
+
// Search for the best transform type for a given transform block.
// This function can be used for both inter and intra, both luma and chroma.
static void search_tx_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
@@ -2215,6 +2252,7 @@
TxfmParam txfm_param;
QUANT_PARAM quant_param;
+ int skip_trellis_based_on_satd[TX_TYPES] = { 0 };
av1_setup_xform(cm, x, tx_size, DCT_DCT, &txfm_param);
av1_setup_quant(tx_size, !skip_trellis,
skip_trellis ? (USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B
@@ -2235,8 +2273,13 @@
RD_STATS this_rd_stats;
av1_invalid_rd_stats(&this_rd_stats);
- av1_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param,
- &quant_param);
+ av1_xform(x, plane, block, blk_row, blk_col, plane_bsize, &txfm_param);
+
+ skip_trellis_based_on_satd[tx_type] = skip_trellis_opt_based_on_satd(
+ x, &quant_param, plane, block, tx_size, cpi->oxcf.quant_b_adapt, qstep,
+ txfm_params->coeff_opt_satd_threshold, skip_trellis);
+
+ av1_quant(x, plane, block, &txfm_param, &quant_param);
// Calculate rate cost of quantized coefficients.
if (quant_param.use_optimize_b) {
@@ -2386,6 +2429,7 @@
if (plane == 0) update_txk_array(xd, blk_row, blk_col, tx_size, best_tx_type);
x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx;
x->plane[plane].eobs[block] = best_eob;
+ skip_trellis = skip_trellis_based_on_satd[best_tx_type];
// Point dqcoeff to the quantized coefficients corresponding to the best
// transform type, then we can skip transform and quantization, e.g. in the
diff --git a/test/horz_superres_test.cc b/test/horz_superres_test.cc
index 836d648..09f6f47 100644
--- a/test/horz_superres_test.cc
+++ b/test/horz_superres_test.cc
@@ -54,7 +54,7 @@
{ "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.5 },
#if CONFIG_AV1_HIGHBITDEPTH
{ "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0,
- 27.97 },
+ 27.84 },
#endif
{ "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1, 20.0 },
// Image coding (single frame).