support 4:1 partition blocks in predict_skip_flag()
Tested on lowres for 30 frames, no compression loss; a few percent
encoding speed improvement.
Change-Id: I11b230c0082a669a494c6a9b26a291fd07ac37a4
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 0a0c4c2..cbbc4db 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -4663,25 +4663,55 @@
return 1;
}
+static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
+ {
+ 0, 0, 0, 50, 50, 50, 55, 47, 47, 53, 53, 53, 0, 0, 0, 0,
+#if CONFIG_EXT_PARTITION
+ 0, 0, 0,
+#endif
+ 50, 50, 55, 55, 53, 53,
+#if CONFIG_EXT_PARTITION
+ 0, 0,
+#endif
+ },
+ {
+ 0, 0, 0, 69, 69, 69, 67, 68, 68, 53, 53, 53, 0, 0, 0, 0,
+#if CONFIG_EXT_PARTITION
+ 0, 0, 0,
+#endif
+ 69, 69, 67, 67, 53, 53,
+#if CONFIG_EXT_PARTITION
+ 0, 0,
+#endif
+ },
+ {
+ 0, 0, 0, 70, 73, 73, 70, 73, 73, 58, 58, 58, 0, 0, 0, 0,
+#if CONFIG_EXT_PARTITION
+ 0, 0, 0,
+#endif
+ 70, 70, 70, 70, 58, 58,
+#if CONFIG_EXT_PARTITION
+ 0, 0,
+#endif
+ }
+};
+
// Uses simple features on top of DCT coefficients to quickly predict
// whether optimal RD decision is to skip encoding the residual.
static int predict_skip_flag(const MACROBLOCK *x, BLOCK_SIZE bsize) {
- if (bsize > BLOCK_16X16) return 0;
- // Tuned for target false-positive rate of 5% for all block sizes:
- const uint32_t threshold_table[3][BLOCK_16X16 - BLOCK_4X4 + 1] = {
- { 50, 50, 50, 55, 47, 47, 53 },
- { 69, 69, 69, 67, 68, 68, 53 },
- { 70, 73, 73, 70, 73, 73, 58 }
- };
- const struct macroblock_plane *const p = &x->plane[0];
+ const int max_tx_size =
+ get_max_rect_tx_size(bsize, is_inter_block(&x->e_mbd.mi[0]->mbmi));
+ const int tx_h = tx_size_high[max_tx_size];
+ const int tx_w = tx_size_wide[max_tx_size];
+ if (tx_h > 16 || tx_w > 16) return 0;
+
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
const MACROBLOCKD *xd = &x->e_mbd;
DECLARE_ALIGNED(32, tran_low_t, DCT_coefs[32 * 32]);
TxfmParam param;
param.tx_type = DCT_DCT;
- param.tx_size =
- get_max_rect_tx_size(bsize, is_inter_block(&x->e_mbd.mi[0]->mbmi));
+ param.tx_size = max_tx_size;
param.bd = xd->bd;
param.is_hbd = get_bitdepth_data_path_index(xd);
param.lossless = 0;
@@ -4693,29 +4723,33 @@
// within this function.
param.tx_set_type = get_ext_tx_set_type(param.tx_size, plane_bsize,
is_inter_block(&xd->mi[0]->mbmi), 0);
-
+ const uint32_t dc = (uint32_t)av1_dc_quant_QTX(x->qindex, 0, xd->bd);
+ const uint32_t ac = (uint32_t)av1_ac_quant_QTX(x->qindex, 0, xd->bd);
+ uint32_t max_quantized_coef = 0;
+ const int16_t *src_diff = x->plane[0].src_diff;
+ for (int row = 0; row < bh; row += tx_h) {
+ for (int col = 0; col < bw; col += tx_w) {
#if CONFIG_TXMG
- av1_highbd_fwd_txfm(p->src_diff, DCT_coefs, bw, ¶m);
+ av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, ¶m);
#else // CONFIG_TXMG
- if (param.is_hbd)
- av1_highbd_fwd_txfm(p->src_diff, DCT_coefs, bw, ¶m);
- else
- av1_fwd_txfm(p->src_diff, DCT_coefs, bw, ¶m);
+ if (param.is_hbd)
+ av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, ¶m);
+ else
+ av1_fwd_txfm(src_diff + col, DCT_coefs, bw, ¶m);
#endif // CONFIG_TXMG
- // Operating on TX domain, not pixels; we want the QTX quantizers
- uint32_t dc = (uint32_t)av1_dc_quant_QTX(x->qindex, 0, xd->bd);
- uint32_t ac = (uint32_t)av1_ac_quant_QTX(x->qindex, 0, xd->bd);
- uint32_t max_quantized_coef = (100 * (uint32_t)abs(DCT_coefs[0])) / dc;
- for (int i = 1; i < bw * bh; i++) {
- uint32_t cur_quantized_coef = (100 * (uint32_t)abs(DCT_coefs[i])) / ac;
- if (cur_quantized_coef > max_quantized_coef)
- max_quantized_coef = cur_quantized_coef;
+ // Operating on TX domain, not pixels; we want the QTX quantizers
+ for (int i = 0; i < tx_w * tx_h; ++i) {
+ uint32_t cur_quantized_coef =
+ (100 * (uint32_t)abs(DCT_coefs[i])) / (i ? ac : dc);
+ if (cur_quantized_coef > max_quantized_coef)
+ max_quantized_coef = cur_quantized_coef;
+ }
+ }
+ src_diff += tx_h * bw;
}
-
const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2);
- return max_quantized_coef <
- threshold_table[bd_idx][AOMMAX(bsize - BLOCK_4X4, 0)];
+ return max_quantized_coef < skip_pred_threshold[bd_idx][bsize];
}
// Used to set proper context for early termination with skip = 1.