adapt_scan experiment

Performance improvement
        BDRate
lowres  0.921%
midres  0.730%
hdres   1.019%

Change-Id: I26208d6c0531937bff44de505b4ea355c7852802
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 7179e3d..885b782 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -12,6 +12,7 @@
 #include "av1/common/entropy.h"
 #include "av1/common/blockd.h"
 #include "av1/common/onyxc_int.h"
+#include "av1/common/scan.h"
 #include "av1/common/entropymode.h"
 #include "aom_mem/aom_mem.h"
 #include "aom/aom_integer.h"
@@ -2842,6 +2843,10 @@
 #endif  // CONFIG_RANS
 }
 
+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
 static void adapt_coef_probs(AV1_COMMON *cm, TX_SIZE tx_size,
                              unsigned int count_sat,
                              unsigned int update_factor) {
@@ -2881,9 +2886,13 @@
 }
 
 void av1_adapt_coef_probs(AV1_COMMON *cm) {
-  TX_SIZE t;
+  TX_SIZE tx_size;
   unsigned int count_sat, update_factor;
 
+#if CONFIG_ADAPT_SCAN
+  TX_TYPE tx_type;
+#endif
+
 #if CONFIG_ENTROPY
   if (cm->last_frame_type == KEY_FRAME) {
     update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */
@@ -2904,11 +2913,19 @@
     count_sat = COEF_COUNT_SAT;
   }
 #endif  // CONFIG_ENTROPY
-  for (t = TX_4X4; t <= TX_32X32; t++)
-    adapt_coef_probs(cm, t, count_sat, update_factor);
+  for (tx_size = TX_4X4; tx_size <= TX_32X32; tx_size++)
+    adapt_coef_probs(cm, tx_size, count_sat, update_factor);
 #if CONFIG_RANS
   av1_coef_pareto_cdfs(cm->fc);
 #endif  // CONFIG_RANS
+
+#if CONFIG_ADAPT_SCAN
+  for (tx_size = TX_4X4; tx_size < TX_SIZES; ++tx_size)
+    for (tx_type = TX_4X4; tx_type < TX_TYPES; ++tx_type) {
+      av1_update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE_16);
+      av1_update_scan_order_facade(cm, tx_size, tx_type);
+    }
+#endif
 }
 
 #if CONFIG_ENTROPY
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 15b50db..28556fc 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -288,6 +288,10 @@
 
 #endif  // CONFIG_ENTROPY
 
+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
 static INLINE aom_prob av1_merge_probs(aom_prob pre_prob,
                                        const unsigned int ct[2],
                                        unsigned int count_sat,
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index e812f15..e25dcf8 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -12,6 +12,7 @@
 #include "aom_mem/aom_mem.h"
 
 #include "av1/common/reconinter.h"
+#include "av1/common/scan.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/seg_common.h"
 
@@ -1755,6 +1756,9 @@
   av1_default_coef_probs(cm);
   init_mode_probs(cm->fc);
   av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+  av1_init_scan_order(cm);
+#endif
   cm->fc->initialized = 1;
 
   if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
diff --git a/av1/common/idct.c b/av1/common/idct.c
index eedbc79..4f33f9b 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -837,8 +837,10 @@
   if (eob == 1)
     // DC only DCT coefficient
     aom_idct8x8_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
   else if (eob <= 12)
     aom_idct8x8_12_add(input, dest, stride);
+#endif
   else
     aom_idct8x8_64_add(input, dest, stride);
 }
@@ -849,19 +851,22 @@
    * coefficients. Use eobs to separate different cases. */
   if (eob == 1) /* DC only DCT coefficient. */
     aom_idct16x16_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
   else if (eob <= 10)
     aom_idct16x16_10_add(input, dest, stride);
+#endif
   else
     aom_idct16x16_256_add(input, dest, stride);
 }
 
 void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                        int eob) {
-  if (eob == 1)
-    aom_idct32x32_1_add(input, dest, stride);
+  if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
   else if (eob <= 34)
     // non-zero coeff only in upper-left 8x8
     aom_idct32x32_34_add(input, dest, stride);
+#endif
   else
     aom_idct32x32_1024_add(input, dest, stride);
 }
@@ -1659,13 +1664,13 @@
   // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
   // Combine that with code here.
   // DC only DCT coefficient
-  if (eob == 1) {
-    aom_highbd_idct8x8_1_add(input, dest, stride, bd);
-  } else if (eob <= 10) {
+  if (eob == 1) aom_highbd_idct8x8_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 10)
     aom_highbd_idct8x8_10_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
     aom_highbd_idct8x8_64_add(input, dest, stride, bd);
-  }
 }
 
 void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
@@ -1673,25 +1678,25 @@
   // The calculation can be simplified if there are not many non-zero dct
   // coefficients. Use eobs to separate different cases.
   // DC only DCT coefficient.
-  if (eob == 1) {
-    aom_highbd_idct16x16_1_add(input, dest, stride, bd);
-  } else if (eob <= 10) {
+  if (eob == 1) aom_highbd_idct16x16_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 10)
     aom_highbd_idct16x16_10_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
     aom_highbd_idct16x16_256_add(input, dest, stride, bd);
-  }
 }
 
 void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
                               int stride, int eob, int bd) {
   // Non-zero coeff only in upper-left 8x8
-  if (eob == 1) {
-    aom_highbd_idct32x32_1_add(input, dest, stride, bd);
-  } else if (eob <= 34) {
+  if (eob == 1) aom_highbd_idct32x32_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 34)
     aom_highbd_idct32x32_34_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
     aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
-  }
 }
 
 void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
diff --git a/av1/common/scan.h b/av1/common/scan.h
index 407c9ec..af39993 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -82,6 +82,10 @@
 
 static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
                                          TX_TYPE tx_type, int is_inter) {
+#if CONFIG_ADAPT_SCAN
+  (void)is_inter;
+  return &cm->fc->sc[tx_size][tx_type];
+#else  // CONFIG_ADAPT_SCAN
   (void)cm;
 #if CONFIG_EXT_TX
   return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
@@ -90,6 +94,7 @@
   (void)is_inter;
   return &av1_intra_scan_orders[tx_size][tx_type];
 #endif  // CONFIG_EXT_TX
+#endif  // CONFIG_ADAPT_SCAN
 }
 
 #ifdef __cplusplus
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index e12b9da..bd34d4d 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -303,6 +303,9 @@
     const int eob =
         av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size,
                                 tx_type, &max_scan_line, r, mbmi->segment_id);
+#if CONFIG_ADAPT_SCAN
+    av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
     if (eob)
       inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
                               max_scan_line, eob);
@@ -385,6 +388,9 @@
   const int eob =
       av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, tx_type,
                               &max_scan_line, r, segment_id);
+#if CONFIG_ADAPT_SCAN
+  av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
   if (eob)
     inverse_transform_block(xd, plane, tx_type, tx_size,
                             &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 466cb9c..61a4890 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -535,6 +535,9 @@
   }
 
   av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+  av1_init_scan_order(cm);
+#endif
   av1_initialize_rd_consts(cpi);
 
   // Tiling is ignored in the first pass.
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 7707e8f..821adba 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -534,6 +534,14 @@
 
   *tp = t;
 
+#if CONFIG_ADAPT_SCAN
+  // Since dqcoeff is not available here, we pass qcoeff into
+  // av1_update_scan_count_facade(). The update behavior should be the same
+  // because av1_update_scan_count_facade() only cares if coefficients are zero
+  // or not.
+  av1_update_scan_count_facade((AV1_COMMON *)cm, tx_size, tx_type, qcoeff, c);
+#endif
+
   av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row);
 }