adapt_scan experiment
Performance improvement
BDRate
lowres 0.921%
midres 0.730%
hdres 1.019%
Change-Id: I26208d6c0531937bff44de505b4ea355c7852802
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 7179e3d..885b782 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -12,6 +12,7 @@
#include "av1/common/entropy.h"
#include "av1/common/blockd.h"
#include "av1/common/onyxc_int.h"
+#include "av1/common/scan.h"
#include "av1/common/entropymode.h"
#include "aom_mem/aom_mem.h"
#include "aom/aom_integer.h"
@@ -2842,6 +2843,10 @@
#endif // CONFIG_RANS
}
+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
static void adapt_coef_probs(AV1_COMMON *cm, TX_SIZE tx_size,
unsigned int count_sat,
unsigned int update_factor) {
@@ -2881,9 +2886,13 @@
}
void av1_adapt_coef_probs(AV1_COMMON *cm) {
- TX_SIZE t;
+ TX_SIZE tx_size;
unsigned int count_sat, update_factor;
+#if CONFIG_ADAPT_SCAN
+ TX_TYPE tx_type;
+#endif
+
#if CONFIG_ENTROPY
if (cm->last_frame_type == KEY_FRAME) {
update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */
@@ -2904,11 +2913,19 @@
count_sat = COEF_COUNT_SAT;
}
#endif // CONFIG_ENTROPY
- for (t = TX_4X4; t <= TX_32X32; t++)
- adapt_coef_probs(cm, t, count_sat, update_factor);
+ for (tx_size = TX_4X4; tx_size <= TX_32X32; tx_size++)
+ adapt_coef_probs(cm, tx_size, count_sat, update_factor);
#if CONFIG_RANS
av1_coef_pareto_cdfs(cm->fc);
#endif // CONFIG_RANS
+
+#if CONFIG_ADAPT_SCAN
+ for (tx_size = TX_4X4; tx_size < TX_SIZES; ++tx_size)
+ for (tx_type = TX_4X4; tx_type < TX_TYPES; ++tx_type) {
+ av1_update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE_16);
+ av1_update_scan_order_facade(cm, tx_size, tx_type);
+ }
+#endif
}
#if CONFIG_ENTROPY
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 15b50db..28556fc 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -288,6 +288,10 @@
#endif // CONFIG_ENTROPY
+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
static INLINE aom_prob av1_merge_probs(aom_prob pre_prob,
const unsigned int ct[2],
unsigned int count_sat,
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index e812f15..e25dcf8 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -12,6 +12,7 @@
#include "aom_mem/aom_mem.h"
#include "av1/common/reconinter.h"
+#include "av1/common/scan.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/seg_common.h"
@@ -1755,6 +1756,9 @@
av1_default_coef_probs(cm);
init_mode_probs(cm->fc);
av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+ av1_init_scan_order(cm);
+#endif
cm->fc->initialized = 1;
if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
diff --git a/av1/common/idct.c b/av1/common/idct.c
index eedbc79..4f33f9b 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -837,8 +837,10 @@
if (eob == 1)
// DC only DCT coefficient
aom_idct8x8_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
else if (eob <= 12)
aom_idct8x8_12_add(input, dest, stride);
+#endif
else
aom_idct8x8_64_add(input, dest, stride);
}
@@ -849,19 +851,22 @@
* coefficients. Use eobs to separate different cases. */
if (eob == 1) /* DC only DCT coefficient. */
aom_idct16x16_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
else if (eob <= 10)
aom_idct16x16_10_add(input, dest, stride);
+#endif
else
aom_idct16x16_256_add(input, dest, stride);
}
void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
- if (eob == 1)
- aom_idct32x32_1_add(input, dest, stride);
+ if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
else if (eob <= 34)
// non-zero coeff only in upper-left 8x8
aom_idct32x32_34_add(input, dest, stride);
+#endif
else
aom_idct32x32_1024_add(input, dest, stride);
}
@@ -1659,13 +1664,13 @@
// TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
// Combine that with code here.
// DC only DCT coefficient
- if (eob == 1) {
- aom_highbd_idct8x8_1_add(input, dest, stride, bd);
- } else if (eob <= 10) {
+ if (eob == 1) aom_highbd_idct8x8_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+ else if (eob <= 10)
aom_highbd_idct8x8_10_add(input, dest, stride, bd);
- } else {
+#endif
+ else
aom_highbd_idct8x8_64_add(input, dest, stride, bd);
- }
}
void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
@@ -1673,25 +1678,25 @@
// The calculation can be simplified if there are not many non-zero dct
// coefficients. Use eobs to separate different cases.
// DC only DCT coefficient.
- if (eob == 1) {
- aom_highbd_idct16x16_1_add(input, dest, stride, bd);
- } else if (eob <= 10) {
+ if (eob == 1) aom_highbd_idct16x16_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+ else if (eob <= 10)
aom_highbd_idct16x16_10_add(input, dest, stride, bd);
- } else {
+#endif
+ else
aom_highbd_idct16x16_256_add(input, dest, stride, bd);
- }
}
void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd) {
// Non-zero coeff only in upper-left 8x8
- if (eob == 1) {
- aom_highbd_idct32x32_1_add(input, dest, stride, bd);
- } else if (eob <= 34) {
+ if (eob == 1) aom_highbd_idct32x32_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+ else if (eob <= 34)
aom_highbd_idct32x32_34_add(input, dest, stride, bd);
- } else {
+#endif
+ else
aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
- }
}
void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
diff --git a/av1/common/scan.h b/av1/common/scan.h
index 407c9ec..af39993 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -82,6 +82,10 @@
static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
TX_TYPE tx_type, int is_inter) {
+#if CONFIG_ADAPT_SCAN
+ (void)is_inter;
+ return &cm->fc->sc[tx_size][tx_type];
+#else // CONFIG_ADAPT_SCAN
(void)cm;
#if CONFIG_EXT_TX
return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
@@ -90,6 +94,7 @@
(void)is_inter;
return &av1_intra_scan_orders[tx_size][tx_type];
#endif // CONFIG_EXT_TX
+#endif // CONFIG_ADAPT_SCAN
}
#ifdef __cplusplus
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index e12b9da..bd34d4d 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -303,6 +303,9 @@
const int eob =
av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size,
tx_type, &max_scan_line, r, mbmi->segment_id);
+#if CONFIG_ADAPT_SCAN
+ av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
if (eob)
inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
max_scan_line, eob);
@@ -385,6 +388,9 @@
const int eob =
av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, tx_type,
&max_scan_line, r, segment_id);
+#if CONFIG_ADAPT_SCAN
+ av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
if (eob)
inverse_transform_block(xd, plane, tx_type, tx_size,
&pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 466cb9c..61a4890 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -535,6 +535,9 @@
}
av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+ av1_init_scan_order(cm);
+#endif
av1_initialize_rd_consts(cpi);
// Tiling is ignored in the first pass.
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 7707e8f..821adba 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -534,6 +534,14 @@
*tp = t;
+#if CONFIG_ADAPT_SCAN
+ // Since dqcoeff is not available here, we pass qcoeff into
+ // av1_update_scan_count_facade(). The update behavior should be the same
+ // because av1_update_scan_count_facade() only cares if coefficients are zero
+ // or not.
+ av1_update_scan_count_facade((AV1_COMMON *)cm, tx_size, tx_type, qcoeff, c);
+#endif
+
av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row);
}