Merge "Move small fixes and refactoring for obmc pred from AV1" into nextgenv2
diff --git a/aom_scale/generic/aom_scale.c b/aom_scale/generic/aom_scale.c
index 28604ac..9007459 100644
--- a/aom_scale/generic/aom_scale.c
+++ b/aom_scale/generic/aom_scale.c
@@ -68,7 +68,6 @@
                           unsigned int source_scale, unsigned int source_length,
                           unsigned char *dest, int dest_step,
                           unsigned int dest_scale, unsigned int dest_length) {
-  const unsigned int source_pitch = source_step;
   const unsigned char *const dest_end = dest + dest_length * dest_step;
   (void)source_length;
   (void)source_scale;
@@ -81,9 +80,9 @@
   dest += dest_step;
 
   while (dest < dest_end) {
-    const unsigned int a = 3 * source[-source_pitch];
+    const unsigned int a = 3 * source[-source_step];
     const unsigned int b = 10 * source[0];
-    const unsigned int c = 3 * source[source_pitch];
+    const unsigned int c = 3 * source[source_step];
     *dest = (unsigned char)((8 + a + b + c) >> 4);
     source += source_step;
     dest += dest_step;
diff --git a/aomenc.c b/aomenc.c
index 497c8d5..e32a922 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -40,12 +40,12 @@
 #include "aom/aomdx.h"
 #endif
 
-#include "aom/aom_integer.h"
-#include "aom_ports/mem_ops.h"
-#include "aom_ports/aom_timer.h"
-#include "./rate_hist.h"
 #include "./aomstats.h"
+#include "./rate_hist.h"
 #include "./warnings.h"
+#include "aom/aom_integer.h"
+#include "aom_ports/aom_timer.h"
+#include "aom_ports/mem_ops.h"
 #if CONFIG_WEBM_IO
 #include "./webmenc.h"
 #endif
@@ -1860,6 +1860,7 @@
   uint64_t cx_time = 0;
   int stream_cnt = 0;
   int res = 0;
+  int profile_updated = 0;
 
   memset(&input, 0, sizeof(input));
   exec_name = argv_[0];
@@ -1963,6 +1964,54 @@
           { stream->config.cfg.g_input_bit_depth = input.bit_depth; });
     }
 
+    FOREACH_STREAM({
+      if (input.fmt != AOM_IMG_FMT_I420 && input.fmt != AOM_IMG_FMT_I42016) {
+        /* Automatically upgrade if input is non-4:2:0 but a 4:2:0 profile
+           was selected. */
+        switch (stream->config.cfg.g_profile) {
+          case 0:
+            stream->config.cfg.g_profile = 1;
+            profile_updated = 1;
+            break;
+          case 2:
+            stream->config.cfg.g_profile = 3;
+            profile_updated = 1;
+            break;
+          default: break;
+        }
+      }
+#if CONFIG_AOM_HIGHBITDEPTH
+      /* Automatically set the codec bit depth to match the input bit depth.
+       * Upgrade the profile if required. */
+      if (stream->config.cfg.g_input_bit_depth >
+          (unsigned int)stream->config.cfg.g_bit_depth) {
+        stream->config.cfg.g_bit_depth = stream->config.cfg.g_input_bit_depth;
+      }
+      if (stream->config.cfg.g_bit_depth > 8) {
+        switch (stream->config.cfg.g_profile) {
+          case 0:
+            stream->config.cfg.g_profile = 2;
+            profile_updated = 1;
+            break;
+          case 1:
+            stream->config.cfg.g_profile = 3;
+            profile_updated = 1;
+            break;
+          default: break;
+        }
+      }
+      if (stream->config.cfg.g_profile > 1) {
+        stream->config.use_16bit_internal = 1;
+      }
+#endif
+      if (profile_updated) {
+        fprintf(stderr,
+                "Warning: automatically upgrading to profile %d to "
+                "match input format.\n",
+                stream->config.cfg.g_profile);
+      }
+    });
+
     FOREACH_STREAM(set_stream_dimensions(stream, input.width, input.height));
     FOREACH_STREAM(validate_stream_config(stream, &global));
 
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 63f7618..a7eb71e 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -425,7 +425,6 @@
   oxcf->key_freq = cfg->kf_max_dist;
 
   oxcf->speed = abs(extra_cfg->cpu_used);
-  oxcf->encode_breakout = extra_cfg->static_thresh;
   oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref;
 #if CONFIG_EXT_REFS
   oxcf->enable_auto_brf = extra_cfg->enable_auto_bwd_ref;
@@ -505,7 +504,6 @@
   printf("lag_in_frames: %d\n", oxcf->lag_in_frames);
   printf("enable_auto_arf: %d\n", oxcf->enable_auto_arf);
   printf("Version: %d\n", oxcf->Version);
-  printf("encode_breakout: %d\n", oxcf->encode_breakout);
   printf("error resilient: %d\n", oxcf->error_resilient_mode);
   printf("frame parallel detokenization: %d\n",
          oxcf->frame_parallel_decoding_mode);
@@ -953,11 +951,13 @@
     // failure condition, encoder setup is done fully in init() currently.
     if (res == AOM_CODEC_OK) {
 #if CONFIG_EXT_REFS
-      data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img);
+      data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
+                ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img);
 #else
       // There's no codec control for multiple alt-refs so check the encoder
       // instance for its status to determine the compressed data size.
-      data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
+      data_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
+                ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) * get_image_bps(img) / 8 *
                 (cpi->multi_arf_allowed ? 8 : 2);
 #endif  // CONFIG_EXT_REFS
       if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize;
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index bc1970c..8d7c7f8 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -307,6 +307,8 @@
   uint16_t n4_w, n4_h;
   // log2 of n4_w, n4_h
   uint8_t n4_wl, n4_hl;
+  // block size in pixels
+  uint8_t width, height;
 
 #if CONFIG_AOM_QM
   const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index 528343b..88bfb0a 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -42,6 +42,12 @@
 };
 
 // Width/height lookup tables in units of various block sizes
+static const uint8_t block_size_wide[BLOCK_SIZES] = {
+  4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, IF_EXT_PARTITION(64, 128, 128)
+};
+static const uint8_t block_size_high[BLOCK_SIZES] = {
+  4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64, IF_EXT_PARTITION(128, 64, 128)
+};
 static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = {
   1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)
 };
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 7179e3d..885b782 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -12,6 +12,7 @@
 #include "av1/common/entropy.h"
 #include "av1/common/blockd.h"
 #include "av1/common/onyxc_int.h"
+#include "av1/common/scan.h"
 #include "av1/common/entropymode.h"
 #include "aom_mem/aom_mem.h"
 #include "aom/aom_integer.h"
@@ -2842,6 +2843,10 @@
 #endif  // CONFIG_RANS
 }
 
+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
 static void adapt_coef_probs(AV1_COMMON *cm, TX_SIZE tx_size,
                              unsigned int count_sat,
                              unsigned int update_factor) {
@@ -2881,9 +2886,13 @@
 }
 
 void av1_adapt_coef_probs(AV1_COMMON *cm) {
-  TX_SIZE t;
+  TX_SIZE tx_size;
   unsigned int count_sat, update_factor;
 
+#if CONFIG_ADAPT_SCAN
+  TX_TYPE tx_type;
+#endif
+
 #if CONFIG_ENTROPY
   if (cm->last_frame_type == KEY_FRAME) {
     update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY_BITS; /* adapt quickly */
@@ -2904,11 +2913,19 @@
     count_sat = COEF_COUNT_SAT;
   }
 #endif  // CONFIG_ENTROPY
-  for (t = TX_4X4; t <= TX_32X32; t++)
-    adapt_coef_probs(cm, t, count_sat, update_factor);
+  for (tx_size = TX_4X4; tx_size <= TX_32X32; tx_size++)
+    adapt_coef_probs(cm, tx_size, count_sat, update_factor);
 #if CONFIG_RANS
   av1_coef_pareto_cdfs(cm->fc);
 #endif  // CONFIG_RANS
+
+#if CONFIG_ADAPT_SCAN
+  for (tx_size = TX_4X4; tx_size < TX_SIZES; ++tx_size)
+    for (tx_type = TX_4X4; tx_type < TX_TYPES; ++tx_type) {
+      av1_update_scan_prob(cm, tx_size, tx_type, ADAPT_SCAN_UPDATE_RATE_16);
+      av1_update_scan_order_facade(cm, tx_size, tx_type);
+    }
+#endif
 }
 
 #if CONFIG_ENTROPY
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 15b50db..28556fc 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -288,6 +288,10 @@
 
 #endif  // CONFIG_ENTROPY
 
+#if CONFIG_ADAPT_SCAN
+#define ADAPT_SCAN_UPDATE_RATE_16 (1 << 13)
+#endif
+
 static INLINE aom_prob av1_merge_probs(aom_prob pre_prob,
                                        const unsigned int ct[2],
                                        unsigned int count_sat,
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index e812f15..e25dcf8 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -12,6 +12,7 @@
 #include "aom_mem/aom_mem.h"
 
 #include "av1/common/reconinter.h"
+#include "av1/common/scan.h"
 #include "av1/common/onyxc_int.h"
 #include "av1/common/seg_common.h"
 
@@ -1755,6 +1756,9 @@
   av1_default_coef_probs(cm);
   init_mode_probs(cm->fc);
   av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+  av1_init_scan_order(cm);
+#endif
   cm->fc->initialized = 1;
 
   if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
diff --git a/av1/common/enums.h b/av1/common/enums.h
index b02c814..0a1f7a3 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -58,6 +58,10 @@
 #define MAX_TILE_COLS 64
 #endif  // CONFIG_EXT_TILE
 
+#if CONFIG_VAR_TX
+#define MAX_VARTX_DEPTH 2
+#endif
+
 // Bitstream profiles indicated by 2-3 bits in the uncompressed header.
 // 00: Profile 0.  8-bit 4:2:0 only.
 // 10: Profile 1.  8-bit 4:4:4, 4:2:2, and 4:4:0.
diff --git a/av1/common/idct.c b/av1/common/idct.c
index eedbc79..4f33f9b 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -837,8 +837,10 @@
   if (eob == 1)
     // DC only DCT coefficient
     aom_idct8x8_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
   else if (eob <= 12)
     aom_idct8x8_12_add(input, dest, stride);
+#endif
   else
     aom_idct8x8_64_add(input, dest, stride);
 }
@@ -849,19 +851,22 @@
    * coefficients. Use eobs to separate different cases. */
   if (eob == 1) /* DC only DCT coefficient. */
     aom_idct16x16_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
   else if (eob <= 10)
     aom_idct16x16_10_add(input, dest, stride);
+#endif
   else
     aom_idct16x16_256_add(input, dest, stride);
 }
 
 void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
                        int eob) {
-  if (eob == 1)
-    aom_idct32x32_1_add(input, dest, stride);
+  if (eob == 1) aom_idct32x32_1_add(input, dest, stride);
+#if !CONFIG_ADAPT_SCAN
   else if (eob <= 34)
     // non-zero coeff only in upper-left 8x8
     aom_idct32x32_34_add(input, dest, stride);
+#endif
   else
     aom_idct32x32_1024_add(input, dest, stride);
 }
@@ -1659,13 +1664,13 @@
   // TODO(yunqingwang): "eobs = 1" case is also handled in av1_short_idct8x8_c.
   // Combine that with code here.
   // DC only DCT coefficient
-  if (eob == 1) {
-    aom_highbd_idct8x8_1_add(input, dest, stride, bd);
-  } else if (eob <= 10) {
+  if (eob == 1) aom_highbd_idct8x8_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 10)
     aom_highbd_idct8x8_10_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
     aom_highbd_idct8x8_64_add(input, dest, stride, bd);
-  }
 }
 
 void av1_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
@@ -1673,25 +1678,25 @@
   // The calculation can be simplified if there are not many non-zero dct
   // coefficients. Use eobs to separate different cases.
   // DC only DCT coefficient.
-  if (eob == 1) {
-    aom_highbd_idct16x16_1_add(input, dest, stride, bd);
-  } else if (eob <= 10) {
+  if (eob == 1) aom_highbd_idct16x16_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 10)
     aom_highbd_idct16x16_10_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
     aom_highbd_idct16x16_256_add(input, dest, stride, bd);
-  }
 }
 
 void av1_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
                               int stride, int eob, int bd) {
   // Non-zero coeff only in upper-left 8x8
-  if (eob == 1) {
-    aom_highbd_idct32x32_1_add(input, dest, stride, bd);
-  } else if (eob <= 34) {
+  if (eob == 1) aom_highbd_idct32x32_1_add(input, dest, stride, bd);
+#if !CONFIG_ADAPT_SCAN
+  else if (eob <= 34)
     aom_highbd_idct32x32_34_add(input, dest, stride, bd);
-  } else {
+#endif
+  else
     aom_highbd_idct32x32_1024_add(input, dest, stride, bd);
-  }
 }
 
 void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index b6e73cd..be1cbc1 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -531,6 +531,8 @@
     xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y;
     xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x;
     xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y;
+    xd->plane[i].width = xd->plane[i].n4_w * 4;
+    xd->plane[i].height = xd->plane[i].n4_h * 4;
   }
 }
 
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 8056ead..6c4ae2a 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -719,16 +719,16 @@
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
             high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
                                  subpel_x, subpel_y, sf, x_step, y_step, ref,
-                                 &mi->mbmi.interp_filter, xs, ys, xd->bd);
+                                 mi->mbmi.interp_filter, xs, ys, xd->bd);
           } else {
             inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
                             subpel_x, subpel_y, sf, x_step, y_step, ref,
-                            &mi->mbmi.interp_filter, xs, ys);
+                            mi->mbmi.interp_filter, xs, ys);
           }
 #else
           inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x,
                           subpel_y, sf, x_step, y_step, ref,
-                          &mi->mbmi.interp_filter, xs, ys);
+                          mi->mbmi.interp_filter, xs, ys);
 #endif
         }
       }
@@ -802,7 +802,7 @@
                      pre_buf->buf0, pre_buf->width, pre_buf->height,
                      pre_buf->stride, dst, (mi_x >> pd->subsampling_x) + x,
                      (mi_y >> pd->subsampling_y) + y, w, h, dst_buf->stride,
-                     pd->subsampling_x, pd->subsampling_y, xs, ys);
+                     pd->subsampling_x, pd->subsampling_y, xs, ys, ref);
     else
 #endif  // CONFIG_GLOBAL_MOTION
 #endif  // CONFIG_EXT_INTER
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index cf4bedf..cfd283f 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -283,7 +283,7 @@
     const int wl = mi_width_log2_lookup[bsize];
     const int hl = mi_height_log2_lookup[bsize];
     const int h = 1 << (hl + 1 - ss_y);
-    const int step = 1 << txsz;
+    const int step = tx_size_wide_unit[txsz];
     const uint8_t *order = orders[bsize];
     int my_order, bl_order;
 
@@ -671,7 +671,7 @@
                          INTRA_FILTER filter_type) {
   const int dx = get_dx(angle);
   const int dy = get_dy(angle);
-  const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+  const int bs = tx_size_wide[tx_size];
   assert(angle > 0 && angle < 270);
 
   if (angle > 0 && angle < 90) {
@@ -1200,7 +1200,7 @@
   DECLARE_ALIGNED(16, uint16_t, above_data[MAX_SB_SIZE + 16]);
   uint16_t *above_row = above_data + 16;
   const uint16_t *const_above_row = above_row;
-  const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+  const int bs = tx_size_wide[tx_size];
   int need_left = extend_modes[mode] & NEED_LEFT;
   int need_above = extend_modes[mode] & NEED_ABOVE;
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
@@ -1361,7 +1361,7 @@
   DECLARE_ALIGNED(16, uint8_t, above_data[MAX_SB_SIZE + 16]);
   uint8_t *above_row = above_data + 16;
   const uint8_t *const_above_row = above_row;
-  const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
+  const int bs = tx_size_wide[tx_size];
   int need_left = extend_modes[mode] & NEED_LEFT;
   int need_above = extend_modes[mode] & NEED_ABOVE;
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
@@ -1507,25 +1507,21 @@
   }
 }
 
-void av1_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+void av1_predict_intra_block(const MACROBLOCKD *xd, int wpx, int hpx,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride, uint8_t *dst,
                              int dst_stride, int col_off, int row_off,
                              int plane) {
   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int txw = num_4x4_blocks_wide_txsize_lookup[tx_size];
-  const int txh = num_4x4_blocks_high_txsize_lookup[tx_size];
+  const int txw = tx_size_wide_unit[tx_size];
+  const int txh = tx_size_high_unit[tx_size];
   const int have_top = row_off || xd->up_available;
   const int have_left = col_off || xd->left_available;
   const int x = col_off * 4;
   const int y = row_off * 4;
-  const int bw = pd->subsampling_x ? 1 << bwl_in : AOMMAX(2, 1 << bwl_in);
-  const int bh = pd->subsampling_y ? 1 << bhl_in : AOMMAX(2, 1 << bhl_in);
   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
   const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2);
-  const int wpx = 4 * bw;
-  const int hpx = 4 * bh;
   const int txwpx = 4 * txw;
   const int txhpx = 4 * txh;
   // Distance between the right edge of this prediction block to
@@ -1553,8 +1549,8 @@
 
 #if CONFIG_PALETTE
   if (xd->mi[0]->mbmi.palette_mode_info.palette_size[plane != 0] > 0) {
-    const int bs = 4 * num_4x4_blocks_wide_txsize_lookup[tx_size];
-    const int stride = 4 * (1 << bwl_in);
+    const int bs = tx_size_wide[tx_size];
+    const int stride = wpx;
     int r, c;
     uint8_t *map = NULL;
 #if CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index 7778874..23bad1c 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h
@@ -21,7 +21,7 @@
 
 void av1_init_intra_predictors(void);
 
-void av1_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, int bhl_in,
+void av1_predict_intra_block(const MACROBLOCKD *xd, int bw, int bh,
                              TX_SIZE tx_size, PREDICTION_MODE mode,
                              const uint8_t *ref, int ref_stride, uint8_t *dst,
                              int dst_stride, int aoff, int loff, int plane);
diff --git a/av1/common/scan.h b/av1/common/scan.h
index 407c9ec..af39993 100644
--- a/av1/common/scan.h
+++ b/av1/common/scan.h
@@ -82,6 +82,10 @@
 
 static INLINE const SCAN_ORDER *get_scan(const AV1_COMMON *cm, TX_SIZE tx_size,
                                          TX_TYPE tx_type, int is_inter) {
+#if CONFIG_ADAPT_SCAN
+  (void)is_inter;
+  return &cm->fc->sc[tx_size][tx_type];
+#else  // CONFIG_ADAPT_SCAN
   (void)cm;
 #if CONFIG_EXT_TX
   return is_inter ? &av1_inter_scan_orders[tx_size][tx_type]
@@ -90,6 +94,7 @@
   (void)is_inter;
   return &av1_intra_scan_orders[tx_size][tx_type];
 #endif  // CONFIG_EXT_TX
+#endif  // CONFIG_ADAPT_SCAN
 }
 
 #ifdef __cplusplus
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index fc632c3..e5ed39d 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -487,7 +487,7 @@
                               int p_row, int p_width, int p_height,
                               int p_stride, int subsampling_x,
                               int subsampling_y, int x_scale, int y_scale,
-                              int bd) {
+                              int bd, int ref_frm) {
   int i, j;
   ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
@@ -502,8 +502,15 @@
                     subsampling_y);
       out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
       out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
-      pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
-          ref, out[0], out[1], width, height, stride, bd);
+      if (ref_frm)
+        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
+            pred[(j - p_col) + (i - p_row) * p_stride] +
+                highbd_warp_interpolate(ref, out[0], out[1], width, height,
+                                        stride, bd),
+            1);
+      else
+        pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
+            ref, out[0], out[1], width, height, stride, bd);
     }
   }
 }
@@ -542,7 +549,7 @@
                        int height, int stride, uint8_t *pred, int p_col,
                        int p_row, int p_width, int p_height, int p_stride,
                        int subsampling_x, int subsampling_y, int x_scale,
-                       int y_scale) {
+                       int y_scale, int ref_frm) {
   int i, j;
   ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
   if (projectpoints == NULL) return;
@@ -555,8 +562,14 @@
                     subsampling_y);
       out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
       out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
-      pred[(j - p_col) + (i - p_row) * p_stride] =
-          warp_interpolate(ref, out[0], out[1], width, height, stride);
+      if (ref_frm)
+        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
+            pred[(j - p_col) + (i - p_row) * p_stride] +
+                warp_interpolate(ref, out[0], out[1], width, height, stride),
+            1);
+      else
+        pred[(j - p_col) + (i - p_row) * p_stride] =
+            warp_interpolate(ref, out[0], out[1], width, height, stride);
     }
   }
 }
@@ -587,17 +600,17 @@
                     uint8_t *ref, int width, int height, int stride,
                     uint8_t *pred, int p_col, int p_row, int p_width,
                     int p_height, int p_stride, int subsampling_x,
-                    int subsampling_y, int x_scale, int y_scale) {
+                    int subsampling_y, int x_scale, int y_scale, int ref_frm) {
 #if CONFIG_AOM_HIGHBITDEPTH
   if (use_hbd)
     highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
                       p_width, p_height, p_stride, subsampling_x, subsampling_y,
-                      x_scale, y_scale, bd);
+                      x_scale, y_scale, bd, ref_frm);
   else
 #endif  // CONFIG_AOM_HIGHBITDEPTH
     warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
                p_height, p_stride, subsampling_x, subsampling_y, x_scale,
-               y_scale);
+               y_scale, ref_frm);
 }
 
 void av1_integerize_model(const double *model, TransformationType wmtype,
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index da92599..e7b9038 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -64,7 +64,7 @@
                     uint8_t *ref, int width, int height, int stride,
                     uint8_t *pred, int p_col, int p_row, int p_width,
                     int p_height, int p_stride, int subsampling_x,
-                    int subsampling_y, int x_scale, int y_scale);
+                    int subsampling_y, int x_scale, int y_scale, int ref_frm);
 
 // Integerize model into the WarpedMotionParams structure
 void av1_integerize_model(const double *model, TransformationType wmtype,
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 38e2539..e39d08e 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -293,7 +293,7 @@
   if (mbmi->sb_type < BLOCK_8X8)
     if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
 
-  av1_predict_intra_block(xd, pd->n4_wl, pd->n4_hl, tx_size, mode, dst,
+  av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
                           pd->dst.stride, dst, pd->dst.stride, col, row, plane);
 
   if (!mbmi->skip) {
@@ -303,6 +303,9 @@
     const int eob =
         av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size,
                                 tx_type, &max_scan_line, r, mbmi->segment_id);
+#if CONFIG_ADAPT_SCAN
+    av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
     if (eob)
       inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride,
                               max_scan_line, eob);
@@ -312,7 +315,7 @@
 #if CONFIG_VAR_TX
 static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd,
                                   aom_reader *r, MB_MODE_INFO *const mbmi,
-                                  int plane, BLOCK_SIZE plane_bsize, int block,
+                                  int plane, BLOCK_SIZE plane_bsize,
                                   int blk_row, int blk_col, TX_SIZE tx_size,
                                   int *eob_total) {
   const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -334,7 +337,8 @@
 
   if (tx_size == plane_tx_size) {
     PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
-    TX_TYPE tx_type = get_tx_type(plane_type, xd, block, plane_tx_size);
+    int block_idx = (blk_row << 1) + blk_col;
+    TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx, plane_tx_size);
     const SCAN_ORDER *sc = get_scan(cm, plane_tx_size, tx_type, 1);
     int16_t max_scan_line = 0;
     const int eob =
@@ -355,13 +359,11 @@
     for (i = 0; i < 4; ++i) {
       const int offsetr = blk_row + ((i >> 1) << bsl);
       const int offsetc = blk_col + ((i & 0x01) << bsl);
-      int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
-      decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize,
-                            block + i * step, offsetr, offsetc, tx_size - 1,
-                            eob_total);
+      decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, offsetr,
+                            offsetc, tx_size - 1, eob_total);
     }
   }
 }
@@ -385,6 +387,9 @@
   const int eob =
       av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, tx_type,
                               &max_scan_line, r, segment_id);
+#if CONFIG_ADAPT_SCAN
+  av1_update_scan_count_facade(cm, tx_size, tx_type, pd->dqcoeff, eob);
+#endif
   if (eob)
     inverse_transform_block(xd, plane, tx_type, tx_size,
                             &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
@@ -1286,8 +1291,6 @@
         const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
         const int bw_var_tx = tx_size_high_unit[max_tx_size];
         const int bh_var_tx = tx_size_wide_unit[max_tx_size];
-        const int step = num_4x4_blocks_txsize_lookup[max_tx_size];
-        int block = 0;
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
         if (is_rect_tx(mbmi->tx_size)) {
           const TX_SIZE tx_size =
@@ -1309,13 +1312,10 @@
                                                   plane, row, col, tx_size);
         } else {
 #endif
-          for (row = 0; row < num_4x4_h; row += bh_var_tx) {
-            for (col = 0; col < num_4x4_w; col += bw_var_tx) {
-              decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, block,
-                                    row, col, max_tx_size, &eobtotal);
-              block += step;
-            }
-          }
+          for (row = 0; row < num_4x4_h; row += bh_var_tx)
+            for (col = 0; col < num_4x4_w; col += bw_var_tx)
+              decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, row,
+                                    col, max_tx_size, &eobtotal);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
         }
 #endif
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 0ad53fa..3993e72 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -294,7 +294,7 @@
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
-  if (depth == 2) {
+  if (depth == MAX_VARTX_DEPTH) {
     int idx, idy;
     inter_tx_size[0][0] = tx_size;
     for (idy = 0; idy < num_4x4_blocks_high_txsize_lookup[tx_size] / 2; ++idy)
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index d8e12d5..df27fbc 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -70,37 +70,9 @@
     };
 #endif  // CONFIG_EXT_INTER
 #if CONFIG_PALETTE
-static const struct av1_token palette_size_encodings[] = {
-  { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 30, 5 }, { 62, 6 }, { 63, 6 },
-};
-static const struct av1_token
-    palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = {
-      { { 0, 1 }, { 1, 1 } },                                  // 2 colors
-      { { 0, 1 }, { 2, 2 }, { 3, 2 } },                        // 3 colors
-      { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } },              // 4 colors
-      { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 } },  // 5 colors
-      { { 0, 1 },
-        { 2, 2 },
-        { 6, 3 },
-        { 14, 4 },
-        { 30, 5 },
-        { 31, 5 } },  // 6 colors
-      { { 0, 1 },
-        { 2, 2 },
-        { 6, 3 },
-        { 14, 4 },
-        { 30, 5 },
-        { 62, 6 },
-        { 63, 6 } },  // 7 colors
-      { { 0, 1 },
-        { 2, 2 },
-        { 6, 3 },
-        { 14, 4 },
-        { 30, 5 },
-        { 62, 6 },
-        { 126, 7 },
-        { 127, 7 } },  // 8 colors
-    };
+static struct av1_token palette_size_encodings[PALETTE_MAX_SIZE - 1];
+static struct av1_token palette_color_encodings[PALETTE_MAX_SIZE - 1]
+                                               [PALETTE_MAX_SIZE];
 #endif  // CONFIG_PALETTE
 static const struct av1_token tx_size_encodings[TX_SIZES - 1][TX_SIZES] = {
   { { 0, 1 }, { 1, 1 } },                      // Max tx_size is 8X8
@@ -145,8 +117,10 @@
 #endif  // CONFIG_LOOP_RESTORATION
 
 void av1_encode_token_init(void) {
-#if CONFIG_EXT_TX
+#if CONFIG_EXT_TX || CONFIG_PALETTE
   int s;
+#endif  // CONFIG_EXT_TX || CONFIG_PALETTE
+#if CONFIG_EXT_TX
   for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
     av1_tokens_from_tree(ext_tx_inter_encodings[s], av1_ext_tx_inter_tree[s]);
   }
@@ -163,6 +137,13 @@
   av1_tokens_from_tree(inter_mode_encodings, av1_inter_mode_tree);
 #endif
 
+#if CONFIG_PALETTE
+  av1_tokens_from_tree(palette_size_encodings, av1_palette_size_tree);
+  for (s = 0; s < PALETTE_MAX_SIZE - 1; ++s) {
+    av1_tokens_from_tree(palette_color_encodings[s], av1_palette_color_tree[s]);
+  }
+#endif  // CONFIG_PALETTE
+
 #if CONFIG_EXT_INTRA
   av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree);
 #endif  // CONFIG_EXT_INTRA
@@ -379,7 +360,7 @@
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
-  if (depth == 2) {
+  if (depth == MAX_VARTX_DEPTH) {
     txfm_partition_update(xd->above_txfm_context + tx_col,
                           xd->left_txfm_context + tx_row, tx_size);
     return;
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 0f9ef46..5c8a2f6 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -80,8 +80,7 @@
   MACROBLOCKD e_mbd;
   MB_MODE_INFO_EXT *mbmi_ext;
   int skip_block;
-  int select_tx_size;
-  int q_index;
+  int qindex;
 
   // The equivalent error at the current rdmult of one whole bit (not one
   // bitcost unit).
@@ -157,8 +156,6 @@
 
   int skip;
 
-  int encode_breakout;
-
   // note that token_costs is the cost when eob node is skipped
   av1_coeff_cost token_costs[TX_SIZES];
 
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 410498b..4041b15 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -324,11 +324,8 @@
       mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
     }
     av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
-
-    x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
   } else {
     mbmi->segment_id = 0;
-    x->encode_breakout = cpi->encode_breakout;
   }
 
 #if CONFIG_SUPERTX
@@ -404,7 +401,6 @@
 
   if (!seg->enabled) {
     seg_id_supertx = 0;
-    x->encode_breakout = cpi->encode_breakout;
   } else {
     // Find the minimum segment_id
     for (r = 0; r < mih; r++)
@@ -415,7 +411,6 @@
 
     // Initialize plane quantisers
     av1_init_plane_quantizers(cpi, x, seg_id_supertx);
-    x->encode_breakout = cpi->segment_encode_breakout[seg_id_supertx];
   }
 
   // Assign the the segment_id back to segment_id_supertx
@@ -852,7 +847,6 @@
 
     if (cyclic_refresh_segment_id_boosted(segment_id)) {
       int q = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-      assert(q == xd->qindex[segment_id]);
       set_vbp_thresholds(cpi, thresholds, q);
     }
   }
@@ -1606,7 +1600,6 @@
   av1_init_plane_quantizers(cpi, x, segment_id);
   aom_clear_system_state();
   segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
-  assert(segment_qindex == x->e_mbd.qindex[segment_id]);
   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
 }
 
@@ -1690,7 +1683,6 @@
       mbmi->segment_id = av1_vaq_segment_id(energy);
       // Re-initialise quantiser
       av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
-      x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
     }
     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
   } else if (aq_mode == COMPLEXITY_AQ) {
@@ -4675,7 +4667,6 @@
                            : cm->base_qindex;
     xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
                       cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
-    xd->qindex[i] = qindex;
   }
 
   if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 8914ba5..6b7e72c 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -95,8 +95,7 @@
 #endif
   const int shift = get_tx_scale(xd, tx_type, tx_size);
 #if CONFIG_NEW_QUANT
-  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
-                                   ref, plane_type);
+  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
   const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
 #else
   const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
@@ -123,8 +122,7 @@
   int shortcut = 0;
   int next_shortcut = 0;
 
-  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
-         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+  assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
 
   token_costs += band;
 
@@ -518,8 +516,7 @@
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
-                                   is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
   uint16_t *const eob = &p->eobs[block];
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
@@ -527,8 +524,7 @@
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
-  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
-         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+  assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
 
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
@@ -588,8 +584,7 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
-  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
-                                   is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -600,8 +595,7 @@
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
-  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
-         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+  assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
 
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
@@ -665,13 +659,11 @@
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
-  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
-                                   is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
-  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
-         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+  assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
 
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
@@ -730,13 +722,11 @@
   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int16_t *src_diff;
   const int is_inter = is_inter_block(&xd->mi[0]->mbmi);
-  int dq = get_dq_profile_from_ctx(xd->qindex[xd->mi[0]->mbmi.segment_id], ctx,
-                                   is_inter, plane_type);
+  int dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
 
   FWD_TXFM_PARAM fwd_txfm_param;
 
-  assert((xd->qindex[xd->mi[0]->mbmi.segment_id] == 0) ^
-         (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
+  assert((x->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
 
   fwd_txfm_param.tx_type = tx_type;
   fwd_txfm_param.tx_size = tx_size;
@@ -1077,16 +1067,14 @@
   PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
   const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
   PREDICTION_MODE mode;
-  const int bwl = b_width_log2_lookup[plane_bsize];
-  const int bhl = b_height_log2_lookup[plane_bsize];
-  const int diff_stride = 4 * (1 << bwl);
+  const int diff_stride = block_size_wide[plane_bsize];
   uint8_t *src, *dst;
   int16_t *src_diff;
   uint16_t *eob = &p->eobs[block];
   const int src_stride = p->src.stride;
   const int dst_stride = pd->dst.stride;
-  const int tx1d_width = num_4x4_blocks_wide_txsize_lookup[tx_size] << 2;
-  const int tx1d_height = num_4x4_blocks_high_txsize_lookup[tx_size] << 2;
+  const int tx1d_width = tx_size_wide[tx_size];
+  const int tx1d_height = tx_size_high[tx_size];
   ENTROPY_CONTEXT *a = NULL, *l = NULL;
   int ctx;
 
@@ -1097,10 +1085,9 @@
   dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
   src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
   src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
-
   mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
-  av1_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst,
-                          dst_stride, blk_col, blk_row, plane);
+  av1_predict_intra_block(xd, pd->width, pd->height, tx_size, mode, dst,
+                          dst_stride, dst, dst_stride, blk_col, blk_row, plane);
 #if CONFIG_AOM_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     aom_highbd_subtract_block(tx1d_height, tx1d_width, src_diff, diff_stride,
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 33c536d..629eb46 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1897,14 +1897,6 @@
   av1_reset_segment_features(cm);
   av1_set_high_precision_mv(cpi, 0);
 
-  {
-    int i;
-
-    for (i = 0; i < MAX_SEGMENTS; i++)
-      cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
-  }
-  cpi->encode_breakout = cpi->oxcf.encode_breakout;
-
   set_rc_buffer_sizes(rc, &cpi->oxcf);
 
   // Under a configuration change, where maximum_buffer_size may change,
@@ -2167,8 +2159,6 @@
   kf_list = fopen("kf_list.stt", "w");
 #endif
 
-  cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
-
   if (oxcf->pass == 1) {
     av1_init_first_pass(cpi);
   } else if (oxcf->pass == 2) {
@@ -4760,8 +4750,6 @@
 
 static void Pass2Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest,
                         unsigned int *frame_flags) {
-  cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
-
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
 
 #if CONFIG_EXT_REFS
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 0c66905..8738609 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -86,15 +86,6 @@
 } FRAME_CONTEXT_INDEX;
 
 typedef enum {
-  // encode_breakout is disabled.
-  ENCODE_BREAKOUT_DISABLED = 0,
-  // encode_breakout is enabled.
-  ENCODE_BREAKOUT_ENABLED = 1,
-  // encode_breakout is enabled with small max_thresh limit.
-  ENCODE_BREAKOUT_LIMITED = 2
-} ENCODE_BREAKOUT_TYPE;
-
-typedef enum {
   NORMAL = 0,
   FOURFIVE = 1,
   THREEFIVE = 2,
@@ -223,8 +214,6 @@
   int enable_auto_brf;  // (b)ackward (r)ef (f)rame
 #endif                  // CONFIG_EXT_REFS
 
-  int encode_breakout;  // early breakout : for video conf recommend 800
-
   /* Bitfield defining the error resiliency features to enable.
    * Can provide decodable frames after losses in previous
    * frames and decodable partitions after losses in the same frame.
@@ -449,18 +438,8 @@
 
   int allow_comp_inter_inter;
 
-  // Default value is 1. From first pass stats, encode_breakout may be disabled.
-  ENCODE_BREAKOUT_TYPE allow_encode_breakout;
-
-  // Get threshold from external input. A suggested threshold is 800 for HD
-  // clips, and 300 for < HD clips.
-  int encode_breakout;
-
   uint8_t *segmentation_map;
 
-  // segment threashold for encode breakout
-  int segment_encode_breakout[MAX_SEGMENTS];
-
   CYCLIC_REFRESH *cyclic_refresh;
   ActiveMap active_map;
 
@@ -751,12 +730,10 @@
 #endif  // CONFIG_EXT_REFS
 
 static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols) {
-  // TODO(JBB): double check we can't exceed this token count if we have a
-  // 32x32 transform crossing a boundary at a multiple of 16.
-  // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
-  // resolution. We assume up to 1 token per pixel, and then allow
-  // a head room of 1 EOSB token per 8x8 block per plane.
-  return mb_rows * mb_cols * (16 * 16 + 4) * 3;
+  // We assume 3 planes all at full resolution. We assume up to 1 token per
+  // pixel, and then allow a head room of 1 EOSB token per 4x4 block per plane,
+  // plus EOSB_TOKEN per plane.
+  return mb_rows * mb_cols * (16 * 16 + 17) * 3;
 }
 
 // Get the allocated token size for a tile. It does the same calculation as in
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 466cb9c..0f7fcca 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -535,6 +535,9 @@
   }
 
   av1_init_mv_probs(cm);
+#if CONFIG_ADAPT_SCAN
+  av1_init_scan_order(cm);
+#endif
   av1_initialize_rd_consts(cpi);
 
   // Tiling is ignored in the first pass.
@@ -579,13 +582,15 @@
       set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize],
                      mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
                      cm->mi_rows, cm->mi_cols);
+      set_plane_n4(xd, num_8x8_blocks_wide_lookup[bsize],
+                   num_8x8_blocks_high_lookup[bsize],
+                   mi_width_log2_lookup[bsize], mi_height_log2_lookup[bsize]);
 
       // Do intra 16x16 prediction.
       xd->mi[0]->mbmi.segment_id = 0;
 #if CONFIG_SUPERTX
       xd->mi[0]->mbmi.segment_id_supertx = 0;
 #endif  // CONFIG_SUPERTX
-      xd->qindex[xd->mi[0]->mbmi.segment_id] = qindex;
       xd->lossless[xd->mi[0]->mbmi.segment_id] = (qindex == 0);
       xd->mi[0]->mbmi.mode = DC_PRED;
       xd->mi[0]->mbmi.tx_size =
diff --git a/av1/encoder/mbgraph.c b/av1/encoder/mbgraph.c
index 9bbed2b..1fd1682 100644
--- a/av1/encoder/mbgraph.c
+++ b/av1/encoder/mbgraph.c
@@ -149,7 +149,7 @@
     unsigned int err;
 
     xd->mi[0]->mbmi.mode = mode;
-    av1_predict_intra_block(xd, 2, 2, TX_16X16, mode, x->plane[0].src.buf,
+    av1_predict_intra_block(xd, 16, 16, TX_16X16, mode, x->plane[0].src.buf,
                             x->plane[0].src.stride, xd->plane[0].dst.buf,
                             xd->plane[0].dst.stride, 0, 0, 0);
     err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c
index db2fdb8..771f94b 100644
--- a/av1/encoder/quantize.c
+++ b/av1/encoder/quantize.c
@@ -1293,11 +1293,11 @@
   }
 
   x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
-  x->q_index = qindex;
+  x->qindex = qindex;
 
   set_error_per_bit(x, rdmult);
 
-  av1_initialize_me_consts(cpi, x, x->q_index);
+  av1_initialize_me_consts(cpi, x, qindex);
 }
 
 void av1_frame_init_quantizer(AV1_COMP *cpi) {
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index faf330b..fdf9b51 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1528,6 +1528,10 @@
 #if CONFIG_EXT_TX
   int ext_tx_set;
 #endif  // CONFIG_EXT_TX
+  *distortion = INT64_MAX;
+  *rate = INT_MAX;
+  *skip = 0;
+  *sse = INT64_MAX;
 
   mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
 
@@ -1591,10 +1595,17 @@
       if (this_rd < best_rd) {
         best_rd = this_rd;
         best_tx_type = mbmi->tx_type;
+        *distortion = d;
+        *rate = r;
+        *skip = s;
+        *sse = psse;
       }
     }
+  } else {
+    mbmi->tx_type = DCT_DCT;
+    txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
+                     mbmi->tx_size, cpi->sf.use_fast_coef_costing);
   }
-
 #else   // CONFIG_EXT_TX
   if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
     for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
@@ -1628,14 +1639,19 @@
       if (this_rd < best_rd) {
         best_rd = this_rd;
         best_tx_type = mbmi->tx_type;
+        *distortion = d;
+        *rate = r;
+        *skip = s;
+        *sse = psse;
       }
     }
+  } else {
+    mbmi->tx_type = DCT_DCT;
+    txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
+                     mbmi->tx_size, cpi->sf.use_fast_coef_costing);
   }
 #endif  // CONFIG_EXT_TX
   mbmi->tx_type = best_tx_type;
-
-  txfm_rd_in_plane(x, cpi, rate, distortion, skip, sse, ref_best_rd, 0, bs,
-                   mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 }
 
 static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
@@ -1944,8 +1960,9 @@
           int16_t *const src_diff =
               av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
           xd->mi[0]->bmi[block].as_mode = mode;
-          av1_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
-                                  dst_stride, col + idx, row + idy, 0);
+          av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
+                                  dst_stride, dst, dst_stride, col + idx,
+                                  row + idy, 0);
           aom_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
                                     dst_stride, xd->bd);
           if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -2064,8 +2081,9 @@
         int16_t *const src_diff =
             av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
         xd->mi[0]->bmi[block].as_mode = mode;
-        av1_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst,
-                                dst_stride, col + idx, row + idy, 0);
+        av1_predict_intra_block(xd, pd->width, pd->height, TX_4X4, mode, dst,
+                                dst_stride, dst, dst_stride, col + idx,
+                                row + idy, 0);
         aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
 
         if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
@@ -3116,13 +3134,13 @@
       *skip = 0;
     }
 
-    if (tx_size > TX_4X4 && depth < 2)
+    if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
       *rate += av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
     this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
     tmp_eob = p->eobs[block];
   }
 
-  if (tx_size > TX_4X4 && depth < 2) {
+  if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
     BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
     int bsl = b_height_log2_lookup[bsize];
     int sub_step = num_4x4_blocks_txsize_lookup[tx_size - 1];
@@ -9373,7 +9391,7 @@
     int best_rate_nocoef;
 #endif
     int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
-    int skippable = 0, rate_overhead = 0;
+    int skippable = 0, rate_overhead_palette = 0;
     TX_SIZE best_tx_size, uv_tx;
     TX_TYPE best_tx_type;
     PALETTE_MODE_INFO palette_mode_info;
@@ -9381,13 +9399,12 @@
         x->palette_buffer->best_palette_color_map;
     uint8_t *const color_map = xd->plane[0].color_index_map;
 
-    rate_overhead = 0;
     mbmi->mode = DC_PRED;
     mbmi->uv_mode = DC_PRED;
     mbmi->ref_frame[0] = INTRA_FRAME;
     mbmi->ref_frame[1] = NONE;
     palette_mode_info.palette_size[0] = 0;
-    rate_overhead = rd_pick_palette_intra_sby(
+    rate_overhead_palette = rd_pick_palette_intra_sby(
         cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
         &palette_mode_info, best_palette_color_map, &best_tx_size,
         &best_tx_type, &mode_selected, &dummy_rd);
@@ -9436,7 +9453,7 @@
 #endif  // CONFIG_FILTER_INTRA
     skippable = skippable && skip_uvs[uv_tx];
     distortion2 = distortion_y + dist_uvs[uv_tx];
-    rate2 = rate_y + rate_overhead + rate_uv_intra[uv_tx];
+    rate2 = rate_y + rate_overhead_palette + rate_uv_intra[uv_tx];
     rate2 += ref_costs_single[INTRA_FRAME];
 
     if (skippable) {
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index d32ff7d..acdc13b 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -270,11 +270,6 @@
       sf->partition_search_breakout_dist_thr = (1 << 23);
     }
   }
-
-  if (speed >= 7) {
-    sf->encode_breakout_thresh =
-        (AOMMIN(cm->width, cm->height) >= 720) ? 800 : 300;
-  }
 }
 
 static void set_rt_speed_feature(AV1_COMP *cpi, SPEED_FEATURES *sf, int speed,
@@ -454,11 +449,6 @@
     sf->adaptive_pred_interp_filter = 0;
   }
 
-  if (cpi->encode_breakout && oxcf->mode == REALTIME &&
-      sf->encode_breakout_thresh > cpi->encode_breakout) {
-    cpi->encode_breakout = sf->encode_breakout_thresh;
-  }
-
   // Check for masked out split cases.
   for (i = 0; i < MAX_REFS; ++i) {
     if (sf->disable_split_mask & (1 << i)) {
@@ -542,7 +532,6 @@
   // to FIXED_PARTITION.
   sf->always_this_block_size = BLOCK_16X16;
   sf->search_type_check_frequency = 50;
-  sf->encode_breakout_thresh = 0;
   // Recode loop tolerance %.
   sf->recode_tolerance = 25;
   sf->default_interp_filter = SWITCHABLE;
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 5c34808..c6821bf 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -443,10 +443,6 @@
   // time mode speed 6.
   int reuse_inter_pred_sby;
 
-  // This variable sets the encode_breakout threshold. Currently, it is only
-  // enabled in real time mode.
-  int encode_breakout_thresh;
-
   // default interp filter choice
   InterpFilter default_interp_filter;
 
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 7707e8f..821adba 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -534,6 +534,14 @@
 
   *tp = t;
 
+#if CONFIG_ADAPT_SCAN
+  // Since dqcoeff is not available here, we pass qcoeff into
+  // av1_update_scan_count_facade(). The update behavior should be the same
+  // because av1_update_scan_count_facade() only cares if coefficients are zero
+  // or not.
+  av1_update_scan_count_facade((AV1_COMMON *)cm, tx_size, tx_type, qcoeff, c);
+#endif
+
   av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row);
 }
 
diff --git a/test/av1_inv_txfm_test.cc b/test/av1_inv_txfm_test.cc
index ff358b6..84e2402 100644
--- a/test/av1_inv_txfm_test.cc
+++ b/test/av1_inv_txfm_test.cc
@@ -137,6 +137,7 @@
   InvTxfmFunc partial_itxfm_;
 };
 
+#if !CONFIG_ADAPT_SCAN
 TEST_P(AV1PartialIDctTest, RunQuantCheck) {
   int size;
   switch (tx_size_) {
@@ -256,6 +257,7 @@
   EXPECT_EQ(0, max_error)
       << "Error: partial inverse transform produces different results";
 }
+#endif
 using std::tr1::make_tuple;
 
 INSTANTIATE_TEST_CASE_P(
diff --git a/test/test.mk b/test/test.mk
index 61ea1d0..2d18f69 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -113,6 +113,7 @@
 #LIBAOM_TEST_SRCS-yes                   += encoder_parms_get_to_decoder.cc
 endif
 
+LIBAOM_TEST_SRCS-$(CONFIG_ADAPT_SCAN)  += scan_test.cc
 #LIBAOM_TEST_SRCS-yes                   += convolve_test.cc
 LIBAOM_TEST_SRCS-yes                   += lpf_8_test.cc
 LIBAOM_TEST_SRCS-$(CONFIG_CLPF)        += clpf_test.cc