Merge "Add parallel-deblocking experiment" into nextgenv2
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index 4735199..eebdc0c 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -191,6 +191,7 @@
 endif  # CONFIG_AOM_HIGHBITDEPTH
 
 DSP_SRCS-yes            += txfm_common.h
+DSP_SRCS-yes            += x86/txfm_common_intrin.h
 DSP_SRCS-$(HAVE_SSE2)   += x86/txfm_common_sse2.h
 DSP_SRCS-$(HAVE_MSA)    += mips/txfm_macros_msa.h
 # forward transform
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 6d873bc..d6fa90b 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -701,6 +701,34 @@
 #
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
+    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct4x4 sse2/;
+
+    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct4x4_1 sse2/;
+
+    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct8x8 sse2/;
+
+    add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct8x8_1 sse2/;
+
+    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct16x16 sse2/;
+
+    add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct16x16_1 sse2 avx2/;
+
+    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct32x32 sse2 avx2/;
+
+    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct32x32_rd sse2 avx2/;
+
+    add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct32x32_1 sse2 avx2/;
+
+    # High bit depth
     add_proto qw/void aom_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
     specialize qw/aom_highbd_fdct4x4 sse2/;
 
@@ -724,33 +752,34 @@
 
     add_proto qw/void aom_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
     specialize qw/aom_highbd_fdct32x32_1/;
-  }   # CONFIG_AOM_HIGHBITDEPTH
-  add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct4x4 sse2 msa/;
+  } else {
+    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct4x4 sse2 msa/;
 
-  add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct4x4_1 sse2/;
+    add_proto qw/void aom_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct4x4_1 sse2/;
 
-  add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
+    add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
 
-  add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct8x8_1 sse2 neon msa/;
+    add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct8x8_1 sse2 neon msa/;
 
-  add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct16x16 sse2 msa/;
+    add_proto qw/void aom_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct16x16 sse2 msa/;
 
-  add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct16x16_1 sse2 avx2 msa/;
+    add_proto qw/void aom_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct16x16_1 sse2 avx2 msa/;
 
-  add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct32x32 sse2 avx2 msa/;
+    add_proto qw/void aom_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct32x32 sse2 avx2 msa/;
 
-  add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
+    add_proto qw/void aom_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct32x32_rd sse2 avx2 msa/;
 
-  add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
-  specialize qw/aom_fdct32x32_1 sse2 avx2 msa/;
+    add_proto qw/void aom_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
+    specialize qw/aom_fdct32x32_1 sse2 avx2 msa/;
+  }  # CONFIG_AOM_HIGHBITDEPTH
 }  # CONFIG_AV1_ENCODER
 
 #
diff --git a/aom_dsp/x86/fwd_dct32x32_impl_avx2.h b/aom_dsp/x86/fwd_dct32x32_impl_avx2.h
index 8b136e7..2167395 100644
--- a/aom_dsp/x86/fwd_dct32x32_impl_avx2.h
+++ b/aom_dsp/x86/fwd_dct32x32_impl_avx2.h
@@ -12,6 +12,7 @@
 #include <immintrin.h>  // AVX2
 
 #include "aom_dsp/txfm_common.h"
+#include "aom_dsp/x86/txfm_common_intrin.h"
 #include "aom_dsp/x86/txfm_common_avx2.h"
 
 #if FDCT32x32_HIGH_PRECISION
@@ -31,7 +32,19 @@
 }
 #endif
 
-void FDCT32x32_2D_AVX2(const int16_t *input, int16_t *output_org, int stride) {
+#ifndef STORE_COEFF_FUNC
+#define STORE_COEFF_FUNC
+static void store_coeff(const __m256i *coeff, tran_low_t *curr,
+                        tran_low_t *next) {
+  __m128i u = _mm256_castsi256_si128(*coeff);
+  storeu_output(&u, curr);
+  u = _mm256_extractf128_si256(*coeff, 1);
+  storeu_output(&u, next);
+}
+#endif
+
+void FDCT32x32_2D_AVX2(const int16_t *input, tran_low_t *output_org,
+                       int stride) {
   // Calculate pre-multiplied strides
   const int str1 = stride;
   const int str2 = 2 * stride;
@@ -2842,13 +2855,14 @@
       {
         int transpose_block;
         int16_t *output_currStep, *output_nextStep;
-        if (0 == pass) {
-          output_currStep = &intermediate[column_start * 32];
-          output_nextStep = &intermediate[(column_start + 8) * 32];
-        } else {
-          output_currStep = &output_org[column_start * 32];
-          output_nextStep = &output_org[(column_start + 8) * 32];
-        }
+        tran_low_t *curr_out, *next_out;
+        // Pass 0
+        output_currStep = &intermediate[column_start * 32];
+        output_nextStep = &intermediate[(column_start + 8) * 32];
+        // Pass 1
+        curr_out = &output_org[column_start * 32];
+        next_out = &output_org[(column_start + 8) * 32];
+
         for (transpose_block = 0; transpose_block < 4; ++transpose_block) {
           __m256i *this_out = &out[8 * transpose_block];
           // 00  01  02  03  04  05  06  07  08  09  10  11  12  13  14  15
@@ -2948,44 +2962,58 @@
             tr2_6 = _mm256_srai_epi16(tr2_6, 2);
             tr2_7 = _mm256_srai_epi16(tr2_7, 2);
           }
-          // Note: even though all these stores are aligned, using the aligned
-          //       intrinsic make the code slightly slower.
-          _mm_storeu_si128((__m128i *)(output_currStep + 0 * 32),
-                           _mm256_castsi256_si128(tr2_0));
-          _mm_storeu_si128((__m128i *)(output_currStep + 1 * 32),
-                           _mm256_castsi256_si128(tr2_1));
-          _mm_storeu_si128((__m128i *)(output_currStep + 2 * 32),
-                           _mm256_castsi256_si128(tr2_2));
-          _mm_storeu_si128((__m128i *)(output_currStep + 3 * 32),
-                           _mm256_castsi256_si128(tr2_3));
-          _mm_storeu_si128((__m128i *)(output_currStep + 4 * 32),
-                           _mm256_castsi256_si128(tr2_4));
-          _mm_storeu_si128((__m128i *)(output_currStep + 5 * 32),
-                           _mm256_castsi256_si128(tr2_5));
-          _mm_storeu_si128((__m128i *)(output_currStep + 6 * 32),
-                           _mm256_castsi256_si128(tr2_6));
-          _mm_storeu_si128((__m128i *)(output_currStep + 7 * 32),
-                           _mm256_castsi256_si128(tr2_7));
+          if (0 == pass) {
+            // Note: even though all these stores are aligned, using the aligned
+            //       intrinsic make the code slightly slower.
+            _mm_storeu_si128((__m128i *)(output_currStep + 0 * 32),
+                             _mm256_castsi256_si128(tr2_0));
+            _mm_storeu_si128((__m128i *)(output_currStep + 1 * 32),
+                             _mm256_castsi256_si128(tr2_1));
+            _mm_storeu_si128((__m128i *)(output_currStep + 2 * 32),
+                             _mm256_castsi256_si128(tr2_2));
+            _mm_storeu_si128((__m128i *)(output_currStep + 3 * 32),
+                             _mm256_castsi256_si128(tr2_3));
+            _mm_storeu_si128((__m128i *)(output_currStep + 4 * 32),
+                             _mm256_castsi256_si128(tr2_4));
+            _mm_storeu_si128((__m128i *)(output_currStep + 5 * 32),
+                             _mm256_castsi256_si128(tr2_5));
+            _mm_storeu_si128((__m128i *)(output_currStep + 6 * 32),
+                             _mm256_castsi256_si128(tr2_6));
+            _mm_storeu_si128((__m128i *)(output_currStep + 7 * 32),
+                             _mm256_castsi256_si128(tr2_7));
 
-          _mm_storeu_si128((__m128i *)(output_nextStep + 0 * 32),
-                           _mm256_extractf128_si256(tr2_0, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 1 * 32),
-                           _mm256_extractf128_si256(tr2_1, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 2 * 32),
-                           _mm256_extractf128_si256(tr2_2, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 3 * 32),
-                           _mm256_extractf128_si256(tr2_3, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 4 * 32),
-                           _mm256_extractf128_si256(tr2_4, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 5 * 32),
-                           _mm256_extractf128_si256(tr2_5, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 6 * 32),
-                           _mm256_extractf128_si256(tr2_6, 1));
-          _mm_storeu_si128((__m128i *)(output_nextStep + 7 * 32),
-                           _mm256_extractf128_si256(tr2_7, 1));
-          // Process next 8x8
-          output_currStep += 8;
-          output_nextStep += 8;
+            _mm_storeu_si128((__m128i *)(output_nextStep + 0 * 32),
+                             _mm256_extractf128_si256(tr2_0, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 1 * 32),
+                             _mm256_extractf128_si256(tr2_1, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 2 * 32),
+                             _mm256_extractf128_si256(tr2_2, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 3 * 32),
+                             _mm256_extractf128_si256(tr2_3, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 4 * 32),
+                             _mm256_extractf128_si256(tr2_4, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 5 * 32),
+                             _mm256_extractf128_si256(tr2_5, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 6 * 32),
+                             _mm256_extractf128_si256(tr2_6, 1));
+            _mm_storeu_si128((__m128i *)(output_nextStep + 7 * 32),
+                             _mm256_extractf128_si256(tr2_7, 1));
+            // Process next 8x8
+            output_currStep += 8;
+            output_nextStep += 8;
+          }
+          if (1 == pass) {
+            store_coeff(&tr2_0, curr_out + 0 * 32, next_out + 0 * 32);
+            store_coeff(&tr2_1, curr_out + 1 * 32, next_out + 1 * 32);
+            store_coeff(&tr2_2, curr_out + 2 * 32, next_out + 2 * 32);
+            store_coeff(&tr2_3, curr_out + 3 * 32, next_out + 3 * 32);
+            store_coeff(&tr2_4, curr_out + 4 * 32, next_out + 4 * 32);
+            store_coeff(&tr2_5, curr_out + 5 * 32, next_out + 5 * 32);
+            store_coeff(&tr2_6, curr_out + 6 * 32, next_out + 6 * 32);
+            store_coeff(&tr2_7, curr_out + 7 * 32, next_out + 7 * 32);
+            curr_out += 8;
+            next_out += 8;
+          }
         }
       }
     }
diff --git a/aom_dsp/x86/fwd_txfm_avx2.c b/aom_dsp/x86/fwd_txfm_avx2.c
index d381a6e..670f864 100644
--- a/aom_dsp/x86/fwd_txfm_avx2.c
+++ b/aom_dsp/x86/fwd_txfm_avx2.c
@@ -17,14 +17,6 @@
 #undef FDCT32x32_2D_AVX2
 #undef FDCT32x32_HIGH_PRECISION
 
-// TODO(luoyi): The following macro hides an error. The second parameter type of
-// function,
-//   void FDCT32x32_2D_AVX2(const int16_t *, int16_t*, int);
-// is different from the one in,
-//   void aom_fdct32x32_avx2(const int16_t *, tran_low_t*, int);
-// In CONFIG_AOM_HIGHBITDEPTH=1 build, the second parameter type should be
-// int32_t.
-// This function should be removed after av1_fht32x32 scaling/rounding fix.
 #define FDCT32x32_2D_AVX2 aom_fdct32x32_avx2
 #define FDCT32x32_HIGH_PRECISION 1
 #include "aom_dsp/x86/fwd_dct32x32_impl_avx2.h"  // NOLINT
diff --git a/aom_dsp/x86/fwd_txfm_sse2.h b/aom_dsp/x86/fwd_txfm_sse2.h
index 3261584..fe3e446 100644
--- a/aom_dsp/x86/fwd_txfm_sse2.h
+++ b/aom_dsp/x86/fwd_txfm_sse2.h
@@ -12,6 +12,8 @@
 #ifndef AOM_DSP_X86_FWD_TXFM_SSE2_H_
 #define AOM_DSP_X86_FWD_TXFM_SSE2_H_
 
+#include "aom_dsp/x86/txfm_common_intrin.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -257,19 +259,6 @@
 #endif  // CONFIG_AOM_HIGHBITDEPTH
 }
 
-static INLINE void storeu_output(const __m128i *poutput, tran_low_t *dst_ptr) {
-#if CONFIG_AOM_HIGHBITDEPTH
-  const __m128i zero = _mm_setzero_si128();
-  const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
-  __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
-  __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
-  _mm_storeu_si128((__m128i *)(dst_ptr), out0);
-  _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1);
-#else
-  _mm_storeu_si128((__m128i *)(dst_ptr), *poutput);
-#endif  // CONFIG_AOM_HIGHBITDEPTH
-}
-
 static INLINE __m128i mult_round_shift(const __m128i *pin0, const __m128i *pin1,
                                        const __m128i *pmultiplier,
                                        const __m128i *prounding,
diff --git a/aom_dsp/x86/txfm_common_intrin.h b/aom_dsp/x86/txfm_common_intrin.h
new file mode 100644
index 0000000..890e048
--- /dev/null
+++ b/aom_dsp/x86/txfm_common_intrin.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
+#define _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
+
+// Note:
+//  This header file should be put below any x86 intrinsics head file
+
+static INLINE void storeu_output(const __m128i *poutput, tran_low_t *dst_ptr) {
+#if CONFIG_AOM_HIGHBITDEPTH
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero);
+  __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits);
+  __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits);
+  _mm_storeu_si128((__m128i *)(dst_ptr), out0);
+  _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1);
+#else
+  _mm_storeu_si128((__m128i *)(dst_ptr), *poutput);
+#endif  // CONFIG_AOM_HIGHBITDEPTH
+}
+
+#endif  // _AOM_DSP_X86_TXFM_COMMON_INTRIN_H_
diff --git a/av1/common/av1_convolve.c b/av1/common/av1_convolve.c
index 1f8d623..270ab70 100644
--- a/av1/common/av1_convolve.c
+++ b/av1/common/av1_convolve.c
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
 #include <assert.h>
 #include <string.h>
 
diff --git a/av1/common/av1_convolve.h b/av1/common/av1_convolve.h
index 804c102..dafa032 100644
--- a/av1/common/av1_convolve.h
+++ b/av1/common/av1_convolve.h
@@ -1,3 +1,14 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
 #ifndef AV1_COMMON_AV1_CONVOLVE_H_
 #define AV1_COMMON_AV1_CONVOLVE_H_
 #include "av1/common/filter.h"
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 8d7c7f8..65f7440 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -712,6 +712,14 @@
 
 void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
 
+static INLINE int tx_size_to_depth(const TX_SIZE tx_size) {
+  return (int)(tx_size - TX_4X4);
+}
+
+static INLINE TX_SIZE depth_to_tx_size(const int depth) {
+  return (TX_SIZE)(depth + TX_4X4);
+}
+
 static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
                                      const struct macroblockd_plane *pd) {
   TX_SIZE uv_txsize;
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index 9d851e2..3acb58e 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -68,18 +68,27 @@
 };
 
 static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  1,
+#endif
   1, 4, 16, 64,
 #if CONFIG_EXT_TX
   2, 2, 8,  8,  32, 32
 #endif  // CONFIG_EXT_TX
 };
 static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  1,
+#endif
   1, 2, 4, 8,
 #if CONFIG_EXT_TX
   1, 2, 2, 4, 4, 8
 #endif  // CONFIG_EXT_TX
 };
 static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  1,
+#endif
   1, 2, 4, 8,
 #if CONFIG_EXT_TX
   2, 1, 4, 2, 8, 4
@@ -87,18 +96,27 @@
 };
 
 static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  0,
+#endif
   0, 2, 4, 6,
 #if CONFIG_EXT_TX
   1, 1, 3, 3, 5, 5
 #endif  // CONFIG_EXT_TX
 };
 static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  0,
+#endif
   0, 1, 2, 3,
 #if CONFIG_EXT_TX
   0, 1, 1, 2, 2, 3
 #endif  // CONFIG_EXT_TX
 };
 static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  0,
+#endif
   0, 1, 2, 3,
 #if CONFIG_EXT_TX
   1, 0, 2, 1, 3, 2
@@ -419,6 +437,9 @@
 /* clang-format on */
 
 static const TX_SIZE txsize_horz_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  TX_2X2,  // TX_2X2
+#endif
   TX_4X4,    // TX_4X4
   TX_8X8,    // TX_8X8
   TX_16X16,  // TX_16X16
@@ -434,6 +455,9 @@
 };
 
 static const TX_SIZE txsize_vert_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  TX_2X2,  // TX_2X2
+#endif
   TX_4X4,    // TX_4X4
   TX_8X8,    // TX_8X8
   TX_16X16,  // TX_16X16
@@ -450,6 +474,9 @@
 
 // Transform block width in pixels
 static const int tx_size_wide[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  2,
+#endif
   4, 8, 16, 32,
 #if CONFIG_EXT_TX
   4, 8, 8,  16, 16, 32,
@@ -458,6 +485,9 @@
 
 // Transform block height in pixels
 static const int tx_size_high[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  2,
+#endif
   4, 8, 16, 32,
 #if CONFIG_EXT_TX
   8, 4, 16, 8,  32, 16,
@@ -466,6 +496,9 @@
 
 // Transform block width in unit
 static const int tx_size_wide_unit[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  1,
+#endif
   1, 2, 4, 8,
 #if CONFIG_EXT_TX
   1, 2, 2, 4, 4, 8,
@@ -474,6 +507,9 @@
 
 // Transform block height in unit
 static const int tx_size_high_unit[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  1,
+#endif
   1, 2, 4, 8,
 #if CONFIG_EXT_TX
   2, 1, 4, 2, 8, 4,
@@ -482,6 +518,9 @@
 
 // Transform block width in log2
 static const int tx_size_wide_log2[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  2,
+#endif
   2, 3, 4, 5,
 #if CONFIG_EXT_TX
   2, 3, 3, 4, 4, 5,
@@ -490,6 +529,9 @@
 
 // Transform block height in log2
 static const int tx_size_high_log2[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  2,
+#endif
   2, 3, 4, 5,
 #if CONFIG_EXT_TX
   3, 2, 4, 3, 5, 4,
@@ -497,6 +539,9 @@
 };
 
 static const int tx_size_2d[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  4,
+#endif
   16, 64, 256, 1024,
 #if CONFIG_EXT_TX
   32, 32, 128, 128,  512, 512,
@@ -509,6 +554,9 @@
 static const int tx_size_1d_in_unit_log2[TX_SIZES] = { 0, 1, 2, 3 };
 
 static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  BLOCK_4X4,  // TX_2X2
+#endif
   BLOCK_4X4,    // TX_4X4
   BLOCK_8X8,    // TX_8X8
   BLOCK_16X16,  // TX_16X16
@@ -524,6 +572,9 @@
 };
 
 static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  TX_2X2,  // TX_2X2
+#endif
   TX_4X4,    // TX_4X4
   TX_8X8,    // TX_8X8
   TX_16X16,  // TX_16X16
@@ -539,6 +590,9 @@
 };
 
 static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
+#if CONFIG_CB4X4
+  TX_2X2,  // TX_2X2
+#endif
   TX_4X4,    // TX_4X4
   TX_8X8,    // TX_8X8
   TX_16X16,  // TX_16X16
@@ -589,7 +643,10 @@
   //  ss_x == 0    ss_x == 0        ss_x == 1      ss_x == 1
   //  ss_y == 0    ss_y == 1        ss_y == 0      ss_y == 1
   {
-      // BLOCK_4X4
+// BLOCK_4X4
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -604,7 +661,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_4X8
+// BLOCK_4X8
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -623,7 +683,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_8X4
+// BLOCK_8X4
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
@@ -642,7 +705,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_8X8
+// BLOCK_8X8
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
@@ -657,7 +723,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_8X16
+// BLOCK_8X16
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } },
@@ -676,7 +745,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_16X8
+// BLOCK_16X8
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } },
       { { TX_8X8, TX_4X4 }, { TX_8X8, TX_8X8 } },
@@ -695,7 +767,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_16X16
+// BLOCK_16X16
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
@@ -710,7 +785,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_16X32
+// BLOCK_16X32
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } },
@@ -729,7 +807,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_32X16
+// BLOCK_32X16
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } },
@@ -748,7 +829,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_32X32
+// BLOCK_32X32
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -763,7 +847,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_32X64
+// BLOCK_32X64
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -778,7 +865,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_64X32
+// BLOCK_64X32
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -793,7 +883,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_64X64
+// BLOCK_64X64
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -801,7 +894,10 @@
 #if CONFIG_EXT_PARTITION
   },
   {
-      // BLOCK_64X128
+// BLOCK_64X128
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -816,7 +912,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_128X64
+// BLOCK_128X64
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -831,7 +930,10 @@
 #endif  // CONFIG_EXT_TX
   },
   {
-      // BLOCK_128X128
+// BLOCK_128X128
+#if CONFIG_CB4X4
+      { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
       { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
       { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } },
       { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } },
@@ -891,8 +993,11 @@
 
 #if CONFIG_SUPERTX
 static const TX_SIZE uvsupertx_size_lookup[TX_SIZES][2][2] = {
-  //  ss_x == 0 ss_x == 0   ss_x == 1 ss_x == 1
-  //  ss_y == 0 ss_y == 1   ss_y == 0 ss_y == 1
+//  ss_x == 0 ss_x == 0   ss_x == 1 ss_x == 1
+//  ss_y == 0 ss_y == 1   ss_y == 0 ss_y == 1
+#if CONFIG_CB4X4
+  { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
+#endif
   { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } },
   { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } },
   { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } },
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 870632d..049c6ef 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -58,6 +58,9 @@
 #endif
 
 const uint16_t band_count_table[TX_SIZES_ALL][8] = {
+#if CONFIG_CB4X4
+  { 1, 2, 2, 3, 0, 0, 0 },
+#endif
   { 1, 2, 3, 4, 3, 16 - 13, 0 },   { 1, 2, 3, 4, 11, 64 - 21, 0 },
   { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 #if CONFIG_EXT_TX
@@ -68,6 +71,9 @@
 };
 
 const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
+#if CONFIG_CB4X4
+  { 0, 1, 3, 6, 10, 13, 16, 0 },
+#endif
   { 0, 1, 3, 6, 10, 13, 16, 0 },  { 0, 1, 3, 6, 10, 21, 64, 0 },
   { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
 #if CONFIG_EXT_TX
@@ -2833,6 +2839,9 @@
       ROUND_POWER_OF_TWO(cm->base_qindex, 8 - QCTX_BIN_BITS), QCTX_BINS - 1);
   av1_copy(cm->fc->coef_probs, default_qctx_coef_probs[index]);
 #else
+#if CONFIG_CB4X4
+  av1_copy(cm->fc->coef_probs[TX_2X2], default_coef_probs_4x4);
+#endif
   av1_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
   av1_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
   av1_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
@@ -2913,7 +2922,7 @@
     count_sat = COEF_COUNT_SAT;
   }
 #endif  // CONFIG_ENTROPY
-  for (tx_size = TX_4X4; tx_size <= TX_32X32; tx_size++)
+  for (tx_size = 0; tx_size < TX_SIZES; tx_size++)
     adapt_coef_probs(cm, tx_size, count_sat, update_factor);
 #if CONFIG_RANS
   av1_coef_pareto_cdfs(cm->fc);
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index e25dcf8..52dc8f1 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -567,117 +567,117 @@
                                     [PALETTE_COLORS - 1] = {
                                       {
                                           // 2 colors
-                                          { 230, 255, 128, 128, 128, 128, 128 },
-                                          { 214, 255, 128, 128, 128, 128, 128 },
-                                          { 128, 128, 128, 128, 128, 128, 128 },
-                                          { 128, 128, 128, 128, 128, 128, 128 },
-                                          { 128, 128, 128, 128, 128, 128, 128 },
-                                          { 240, 255, 128, 128, 128, 128, 128 },
-                                          { 73, 255, 128, 128, 128, 128, 128 },
-                                          { 128, 128, 128, 128, 128, 128, 128 },
-                                          { 130, 255, 128, 128, 128, 128, 128 },
-                                          { 227, 255, 128, 128, 128, 128, 128 },
-                                          { 128, 128, 128, 128, 128, 128, 128 },
-                                          { 188, 255, 128, 128, 128, 128, 128 },
-                                          { 75, 255, 128, 128, 128, 128, 128 },
-                                          { 250, 255, 128, 128, 128, 128, 128 },
-                                          { 223, 255, 128, 128, 128, 128, 128 },
-                                          { 252, 255, 128, 128, 128, 128, 128 },
+                                          { 230, 0, 0, 0, 0, 0, 0 },
+                                          { 214, 0, 0, 0, 0, 0, 0 },
+                                          { 0, 0, 0, 0, 0, 0, 0 },
+                                          { 0, 0, 0, 0, 0, 0, 0 },
+                                          { 0, 0, 0, 0, 0, 0, 0 },
+                                          { 240, 0, 0, 0, 0, 0, 0 },
+                                          { 73, 0, 0, 0, 0, 0, 0 },
+                                          { 0, 0, 0, 0, 0, 0, 0 },
+                                          { 130, 0, 0, 0, 0, 0, 0 },
+                                          { 227, 0, 0, 0, 0, 0, 0 },
+                                          { 0, 0, 0, 0, 0, 0, 0 },
+                                          { 188, 0, 0, 0, 0, 0, 0 },
+                                          { 75, 0, 0, 0, 0, 0, 0 },
+                                          { 250, 0, 0, 0, 0, 0, 0 },
+                                          { 223, 0, 0, 0, 0, 0, 0 },
+                                          { 252, 0, 0, 0, 0, 0, 0 },
                                       },
                                       {
                                           // 3 colors
-                                          { 229, 137, 255, 128, 128, 128, 128 },
-                                          { 197, 120, 255, 128, 128, 128, 128 },
-                                          { 107, 195, 255, 128, 128, 128, 128 },
-                                          { 128, 128, 128, 128, 128, 128, 128 },
-                                          { 27, 151, 255, 128, 128, 128, 128 },
-                                          { 230, 130, 255, 128, 128, 128, 128 },
-                                          { 37, 230, 255, 128, 128, 128, 128 },
-                                          { 67, 221, 255, 128, 128, 128, 128 },
-                                          { 124, 230, 255, 128, 128, 128, 128 },
-                                          { 195, 109, 255, 128, 128, 128, 128 },
-                                          { 99, 122, 255, 128, 128, 128, 128 },
-                                          { 205, 208, 255, 128, 128, 128, 128 },
-                                          { 40, 235, 255, 128, 128, 128, 128 },
-                                          { 251, 132, 255, 128, 128, 128, 128 },
-                                          { 237, 186, 255, 128, 128, 128, 128 },
-                                          { 253, 112, 255, 128, 128, 128, 128 },
+                                          { 229, 137, 0, 0, 0, 0, 0 },
+                                          { 197, 120, 0, 0, 0, 0, 0 },
+                                          { 107, 195, 0, 0, 0, 0, 0 },
+                                          { 0, 0, 0, 0, 0, 0, 0 },
+                                          { 27, 151, 0, 0, 0, 0, 0 },
+                                          { 230, 130, 0, 0, 0, 0, 0 },
+                                          { 37, 230, 0, 0, 0, 0, 0 },
+                                          { 67, 221, 0, 0, 0, 0, 0 },
+                                          { 124, 230, 0, 0, 0, 0, 0 },
+                                          { 195, 109, 0, 0, 0, 0, 0 },
+                                          { 99, 122, 0, 0, 0, 0, 0 },
+                                          { 205, 208, 0, 0, 0, 0, 0 },
+                                          { 40, 235, 0, 0, 0, 0, 0 },
+                                          { 251, 132, 0, 0, 0, 0, 0 },
+                                          { 237, 186, 0, 0, 0, 0, 0 },
+                                          { 253, 112, 0, 0, 0, 0, 0 },
                                       },
                                       {
                                           // 4 colors
-                                          { 195, 87, 128, 255, 128, 128, 128 },
-                                          { 143, 100, 123, 255, 128, 128, 128 },
-                                          { 94, 124, 119, 255, 128, 128, 128 },
-                                          { 77, 91, 130, 255, 128, 128, 128 },
-                                          { 39, 114, 178, 255, 128, 128, 128 },
-                                          { 222, 94, 125, 255, 128, 128, 128 },
-                                          { 44, 203, 132, 255, 128, 128, 128 },
-                                          { 68, 175, 122, 255, 128, 128, 128 },
-                                          { 110, 187, 124, 255, 128, 128, 128 },
-                                          { 152, 91, 128, 255, 128, 128, 128 },
-                                          { 70, 109, 181, 255, 128, 128, 128 },
-                                          { 133, 113, 164, 255, 128, 128, 128 },
-                                          { 47, 205, 133, 255, 128, 128, 128 },
-                                          { 247, 94, 136, 255, 128, 128, 128 },
-                                          { 205, 122, 146, 255, 128, 128, 128 },
-                                          { 251, 100, 141, 255, 128, 128, 128 },
+                                          { 195, 87, 128, 0, 0, 0, 0 },
+                                          { 143, 100, 123, 0, 0, 0, 0 },
+                                          { 94, 124, 119, 0, 0, 0, 0 },
+                                          { 77, 91, 130, 0, 0, 0, 0 },
+                                          { 39, 114, 178, 0, 0, 0, 0 },
+                                          { 222, 94, 125, 0, 0, 0, 0 },
+                                          { 44, 203, 132, 0, 0, 0, 0 },
+                                          { 68, 175, 122, 0, 0, 0, 0 },
+                                          { 110, 187, 124, 0, 0, 0, 0 },
+                                          { 152, 91, 128, 0, 0, 0, 0 },
+                                          { 70, 109, 181, 0, 0, 0, 0 },
+                                          { 133, 113, 164, 0, 0, 0, 0 },
+                                          { 47, 205, 133, 0, 0, 0, 0 },
+                                          { 247, 94, 136, 0, 0, 0, 0 },
+                                          { 205, 122, 146, 0, 0, 0, 0 },
+                                          { 251, 100, 141, 0, 0, 0, 0 },
                                       },
                                       {
                                           // 5 colors
-                                          { 195, 65, 84, 125, 255, 128, 128 },
-                                          { 150, 76, 84, 121, 255, 128, 128 },
-                                          { 94, 110, 81, 117, 255, 128, 128 },
-                                          { 79, 85, 91, 139, 255, 128, 128 },
-                                          { 26, 102, 139, 127, 255, 128, 128 },
-                                          { 220, 73, 91, 119, 255, 128, 128 },
-                                          { 38, 203, 86, 127, 255, 128, 128 },
-                                          { 61, 186, 72, 124, 255, 128, 128 },
-                                          { 132, 199, 84, 128, 255, 128, 128 },
-                                          { 172, 52, 62, 120, 255, 128, 128 },
-                                          { 102, 89, 121, 122, 255, 128, 128 },
-                                          { 182, 48, 69, 186, 255, 128, 128 },
-                                          { 36, 206, 87, 126, 255, 128, 128 },
-                                          { 249, 55, 67, 122, 255, 128, 128 },
-                                          { 218, 88, 75, 122, 255, 128, 128 },
-                                          { 253, 64, 80, 119, 255, 128, 128 },
+                                          { 195, 65, 84, 125, 0, 0, 0 },
+                                          { 150, 76, 84, 121, 0, 0, 0 },
+                                          { 94, 110, 81, 117, 0, 0, 0 },
+                                          { 79, 85, 91, 139, 0, 0, 0 },
+                                          { 26, 102, 139, 127, 0, 0, 0 },
+                                          { 220, 73, 91, 119, 0, 0, 0 },
+                                          { 38, 203, 86, 127, 0, 0, 0 },
+                                          { 61, 186, 72, 124, 0, 0, 0 },
+                                          { 132, 199, 84, 128, 0, 0, 0 },
+                                          { 172, 52, 62, 120, 0, 0, 0 },
+                                          { 102, 89, 121, 122, 0, 0, 0 },
+                                          { 182, 48, 69, 186, 0, 0, 0 },
+                                          { 36, 206, 87, 126, 0, 0, 0 },
+                                          { 249, 55, 67, 122, 0, 0, 0 },
+                                          { 218, 88, 75, 122, 0, 0, 0 },
+                                          { 253, 64, 80, 119, 0, 0, 0 },
                                       },
                                       {
                                           // 6 colors
-                                          { 182, 54, 64, 75, 118, 255, 128 },
-                                          { 126, 67, 70, 76, 116, 255, 128 },
-                                          { 79, 92, 67, 85, 120, 255, 128 },
-                                          { 63, 61, 81, 118, 132, 255, 128 },
-                                          { 21, 80, 105, 83, 119, 255, 128 },
-                                          { 215, 72, 74, 74, 111, 255, 128 },
-                                          { 50, 176, 63, 79, 120, 255, 128 },
-                                          { 72, 148, 66, 77, 120, 255, 128 },
-                                          { 105, 177, 57, 78, 130, 255, 128 },
-                                          { 150, 66, 66, 80, 127, 255, 128 },
-                                          { 81, 76, 109, 85, 116, 255, 128 },
-                                          { 113, 81, 62, 96, 148, 255, 128 },
-                                          { 54, 179, 69, 82, 121, 255, 128 },
-                                          { 244, 47, 48, 67, 118, 255, 128 },
-                                          { 198, 83, 53, 65, 121, 255, 128 },
-                                          { 250, 42, 51, 69, 110, 255, 128 },
+                                          { 182, 54, 64, 75, 118, 0, 0 },
+                                          { 126, 67, 70, 76, 116, 0, 0 },
+                                          { 79, 92, 67, 85, 120, 0, 0 },
+                                          { 63, 61, 81, 118, 132, 0, 0 },
+                                          { 21, 80, 105, 83, 119, 0, 0 },
+                                          { 215, 72, 74, 74, 111, 0, 0 },
+                                          { 50, 176, 63, 79, 120, 0, 0 },
+                                          { 72, 148, 66, 77, 120, 0, 0 },
+                                          { 105, 177, 57, 78, 130, 0, 0 },
+                                          { 150, 66, 66, 80, 127, 0, 0 },
+                                          { 81, 76, 109, 85, 116, 0, 0 },
+                                          { 113, 81, 62, 96, 148, 0, 0 },
+                                          { 54, 179, 69, 82, 121, 0, 0 },
+                                          { 244, 47, 48, 67, 118, 0, 0 },
+                                          { 198, 83, 53, 65, 121, 0, 0 },
+                                          { 250, 42, 51, 69, 110, 0, 0 },
                                       },
                                       {
                                           // 7 colors
-                                          { 182, 45, 54, 62, 74, 113, 255 },
-                                          { 124, 63, 57, 62, 77, 114, 255 },
-                                          { 77, 80, 56, 66, 76, 117, 255 },
-                                          { 63, 57, 69, 98, 85, 131, 255 },
-                                          { 19, 81, 98, 63, 80, 116, 255 },
-                                          { 215, 56, 60, 63, 68, 105, 255 },
-                                          { 50, 174, 50, 60, 79, 118, 255 },
-                                          { 68, 151, 50, 58, 73, 117, 255 },
-                                          { 104, 182, 53, 57, 79, 127, 255 },
-                                          { 156, 50, 51, 63, 77, 111, 255 },
-                                          { 88, 67, 97, 59, 82, 120, 255 },
-                                          { 114, 81, 46, 65, 103, 132, 255 },
-                                          { 55, 166, 57, 66, 82, 120, 255 },
-                                          { 245, 34, 38, 43, 63, 114, 255 },
-                                          { 203, 68, 45, 47, 60, 118, 255 },
-                                          { 250, 35, 37, 47, 66, 110, 255 },
+                                          { 182, 45, 54, 62, 74, 113, 0 },
+                                          { 124, 63, 57, 62, 77, 114, 0 },
+                                          { 77, 80, 56, 66, 76, 117, 0 },
+                                          { 63, 57, 69, 98, 85, 131, 0 },
+                                          { 19, 81, 98, 63, 80, 116, 0 },
+                                          { 215, 56, 60, 63, 68, 105, 0 },
+                                          { 50, 174, 50, 60, 79, 118, 0 },
+                                          { 68, 151, 50, 58, 73, 117, 0 },
+                                          { 104, 182, 53, 57, 79, 127, 0 },
+                                          { 156, 50, 51, 63, 77, 111, 0 },
+                                          { 88, 67, 97, 59, 82, 120, 0 },
+                                          { 114, 81, 46, 65, 103, 132, 0 },
+                                          { 55, 166, 57, 66, 82, 120, 0 },
+                                          { 245, 34, 38, 43, 63, 114, 0 },
+                                          { 203, 68, 45, 47, 60, 118, 0 },
+                                          { 250, 35, 37, 47, 66, 110, 0 },
                                       },
                                       {
                                           // 8 colors
@@ -700,141 +700,144 @@
                                       }
                                     };
 
-const aom_prob av1_default_palette_uv_color_prob
-    [PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
-    [PALETTE_COLORS - 1] = { {
-                                 // 2 colors
-                                 { 228, 255, 128, 128, 128, 128, 128 },
-                                 { 195, 255, 128, 128, 128, 128, 128 },
-                                 { 128, 128, 128, 128, 128, 128, 128 },
-                                 { 128, 128, 128, 128, 128, 128, 128 },
-                                 { 128, 128, 128, 128, 128, 128, 128 },
-                                 { 228, 255, 128, 128, 128, 128, 128 },
-                                 { 71, 255, 128, 128, 128, 128, 128 },
-                                 { 128, 128, 128, 128, 128, 128, 128 },
-                                 { 129, 255, 128, 128, 128, 128, 128 },
-                                 { 206, 255, 128, 128, 128, 128, 128 },
-                                 { 128, 128, 128, 128, 128, 128, 128 },
-                                 { 136, 255, 128, 128, 128, 128, 128 },
-                                 { 98, 255, 128, 128, 128, 128, 128 },
-                                 { 236, 255, 128, 128, 128, 128, 128 },
-                                 { 222, 255, 128, 128, 128, 128, 128 },
-                                 { 249, 255, 128, 128, 128, 128, 128 },
-                             },
-                             {
-                                 // 3 colors
-                                 { 198, 136, 255, 128, 128, 128, 128 },
-                                 { 178, 105, 255, 128, 128, 128, 128 },
-                                 { 100, 206, 255, 128, 128, 128, 128 },
-                                 { 128, 128, 128, 128, 128, 128, 128 },
-                                 { 12, 136, 255, 128, 128, 128, 128 },
-                                 { 219, 134, 255, 128, 128, 128, 128 },
-                                 { 50, 198, 255, 128, 128, 128, 128 },
-                                 { 61, 231, 255, 128, 128, 128, 128 },
-                                 { 110, 209, 255, 128, 128, 128, 128 },
-                                 { 173, 106, 255, 128, 128, 128, 128 },
-                                 { 145, 166, 255, 128, 128, 128, 128 },
-                                 { 156, 175, 255, 128, 128, 128, 128 },
-                                 { 69, 183, 255, 128, 128, 128, 128 },
-                                 { 241, 163, 255, 128, 128, 128, 128 },
-                                 { 224, 160, 255, 128, 128, 128, 128 },
-                                 { 246, 154, 255, 128, 128, 128, 128 },
-                             },
-                             {
-                                 // 4 colors
-                                 { 173, 88, 143, 255, 128, 128, 128 },
-                                 { 146, 81, 127, 255, 128, 128, 128 },
-                                 { 84, 134, 102, 255, 128, 128, 128 },
-                                 { 69, 138, 140, 255, 128, 128, 128 },
-                                 { 31, 103, 200, 255, 128, 128, 128 },
-                                 { 217, 101, 139, 255, 128, 128, 128 },
-                                 { 51, 174, 121, 255, 128, 128, 128 },
-                                 { 64, 177, 109, 255, 128, 128, 128 },
-                                 { 96, 179, 145, 255, 128, 128, 128 },
-                                 { 164, 77, 114, 255, 128, 128, 128 },
-                                 { 87, 94, 156, 255, 128, 128, 128 },
-                                 { 105, 57, 173, 255, 128, 128, 128 },
-                                 { 63, 158, 137, 255, 128, 128, 128 },
-                                 { 236, 102, 156, 255, 128, 128, 128 },
-                                 { 197, 115, 153, 255, 128, 128, 128 },
-                                 { 245, 106, 154, 255, 128, 128, 128 },
-                             },
-                             {
-                                 // 5 colors
-                                 { 179, 64, 97, 129, 255, 128, 128 },
-                                 { 137, 56, 88, 125, 255, 128, 128 },
-                                 { 82, 107, 61, 118, 255, 128, 128 },
-                                 { 59, 113, 86, 115, 255, 128, 128 },
-                                 { 23, 88, 118, 130, 255, 128, 128 },
-                                 { 213, 66, 90, 125, 255, 128, 128 },
-                                 { 37, 181, 103, 121, 255, 128, 128 },
-                                 { 47, 188, 61, 131, 255, 128, 128 },
-                                 { 104, 185, 103, 144, 255, 128, 128 },
-                                 { 163, 39, 76, 112, 255, 128, 128 },
-                                 { 94, 74, 131, 126, 255, 128, 128 },
-                                 { 142, 42, 103, 163, 255, 128, 128 },
-                                 { 53, 162, 99, 149, 255, 128, 128 },
-                                 { 239, 54, 84, 108, 255, 128, 128 },
-                                 { 203, 84, 110, 147, 255, 128, 128 },
-                                 { 248, 70, 105, 151, 255, 128, 128 },
-                             },
-                             {
-                                 // 6 colors
-                                 { 189, 50, 67, 90, 130, 255, 128 },
-                                 { 114, 50, 55, 90, 123, 255, 128 },
-                                 { 66, 76, 54, 82, 128, 255, 128 },
-                                 { 43, 69, 69, 80, 129, 255, 128 },
-                                 { 22, 59, 87, 88, 141, 255, 128 },
-                                 { 203, 49, 68, 87, 122, 255, 128 },
-                                 { 43, 157, 74, 104, 146, 255, 128 },
-                                 { 54, 138, 51, 95, 138, 255, 128 },
-                                 { 82, 171, 58, 102, 146, 255, 128 },
-                                 { 129, 38, 59, 64, 168, 255, 128 },
-                                 { 56, 67, 119, 92, 112, 255, 128 },
-                                 { 96, 62, 53, 132, 82, 255, 128 },
-                                 { 60, 147, 77, 108, 145, 255, 128 },
-                                 { 238, 76, 73, 93, 148, 255, 128 },
-                                 { 189, 86, 73, 103, 157, 255, 128 },
-                                 { 246, 62, 75, 83, 167, 255, 128 },
-                             },
-                             {
-                                 // 7 colors
-                                 { 179, 42, 51, 73, 99, 134, 255 },
-                                 { 119, 52, 52, 61, 64, 114, 255 },
-                                 { 53, 77, 35, 65, 71, 131, 255 },
-                                 { 38, 70, 51, 68, 89, 144, 255 },
-                                 { 23, 65, 128, 73, 97, 131, 255 },
-                                 { 210, 47, 52, 63, 81, 143, 255 },
-                                 { 42, 159, 57, 68, 98, 143, 255 },
-                                 { 49, 153, 45, 82, 93, 143, 255 },
-                                 { 81, 169, 52, 72, 113, 151, 255 },
-                                 { 136, 46, 35, 56, 75, 96, 255 },
-                                 { 57, 84, 109, 47, 107, 131, 255 },
-                                 { 128, 78, 57, 36, 128, 85, 255 },
-                                 { 54, 149, 68, 77, 94, 153, 255 },
-                                 { 243, 58, 50, 71, 81, 167, 255 },
-                                 { 189, 92, 64, 70, 121, 173, 255 },
-                                 { 248, 35, 38, 51, 82, 201, 255 },
-                             },
-                             {
-                                 // 8 colors
-                                 { 201, 40, 36, 42, 64, 92, 123 },
-                                 { 116, 43, 33, 43, 73, 102, 128 },
-                                 { 46, 77, 37, 69, 62, 78, 150 },
-                                 { 40, 65, 52, 50, 76, 89, 133 },
-                                 { 28, 48, 91, 17, 64, 77, 133 },
-                                 { 218, 43, 43, 37, 56, 72, 163 },
-                                 { 41, 155, 44, 83, 82, 129, 180 },
-                                 { 44, 141, 29, 55, 64, 89, 147 },
-                                 { 92, 166, 48, 45, 59, 126, 179 },
-                                 { 169, 35, 49, 41, 36, 99, 139 },
-                                 { 55, 77, 77, 56, 60, 75, 156 },
-                                 { 155, 81, 51, 64, 57, 182, 255 },
-                                 { 60, 134, 49, 49, 93, 128, 174 },
-                                 { 244, 98, 51, 46, 22, 73, 238 },
-                                 { 189, 70, 40, 87, 93, 79, 201 },
-                                 { 248, 54, 49, 40, 29, 42, 227 },
-                             } };
+const aom_prob
+    av1_default_palette_uv_color_prob[PALETTE_MAX_SIZE - 1]
+                                     [PALETTE_COLOR_CONTEXTS]
+                                     [PALETTE_COLORS - 1] = {
+                                       {
+                                           // 2 colors
+                                           { 228, 0, 0, 0, 0, 0, 0 },
+                                           { 195, 0, 0, 0, 0, 0, 0 },
+                                           { 0, 0, 0, 0, 0, 0, 0 },
+                                           { 0, 0, 0, 0, 0, 0, 0 },
+                                           { 0, 0, 0, 0, 0, 0, 0 },
+                                           { 228, 0, 0, 0, 0, 0, 0 },
+                                           { 71, 0, 0, 0, 0, 0, 0 },
+                                           { 0, 0, 0, 0, 0, 0, 0 },
+                                           { 129, 0, 0, 0, 0, 0, 0 },
+                                           { 206, 0, 0, 0, 0, 0, 0 },
+                                           { 0, 0, 0, 0, 0, 0, 0 },
+                                           { 136, 0, 0, 0, 0, 0, 0 },
+                                           { 98, 0, 0, 0, 0, 0, 0 },
+                                           { 236, 0, 0, 0, 0, 0, 0 },
+                                           { 222, 0, 0, 0, 0, 0, 0 },
+                                           { 249, 0, 0, 0, 0, 0, 0 },
+                                       },
+                                       {
+                                           // 3 colors
+                                           { 198, 136, 0, 0, 0, 0, 0 },
+                                           { 178, 105, 0, 0, 0, 0, 0 },
+                                           { 100, 206, 0, 0, 0, 0, 0 },
+                                           { 0, 0, 0, 0, 0, 0, 0 },
+                                           { 12, 136, 0, 0, 0, 0, 0 },
+                                           { 219, 134, 0, 0, 0, 0, 0 },
+                                           { 50, 198, 0, 0, 0, 0, 0 },
+                                           { 61, 231, 0, 0, 0, 0, 0 },
+                                           { 110, 209, 0, 0, 0, 0, 0 },
+                                           { 173, 106, 0, 0, 0, 0, 0 },
+                                           { 145, 166, 0, 0, 0, 0, 0 },
+                                           { 156, 175, 0, 0, 0, 0, 0 },
+                                           { 69, 183, 0, 0, 0, 0, 0 },
+                                           { 241, 163, 0, 0, 0, 0, 0 },
+                                           { 224, 160, 0, 0, 0, 0, 0 },
+                                           { 246, 154, 0, 0, 0, 0, 0 },
+                                       },
+                                       {
+                                           // 4 colors
+                                           { 173, 88, 143, 0, 0, 0, 0 },
+                                           { 146, 81, 127, 0, 0, 0, 0 },
+                                           { 84, 134, 102, 0, 0, 0, 0 },
+                                           { 69, 138, 140, 0, 0, 0, 0 },
+                                           { 31, 103, 200, 0, 0, 0, 0 },
+                                           { 217, 101, 139, 0, 0, 0, 0 },
+                                           { 51, 174, 121, 0, 0, 0, 0 },
+                                           { 64, 177, 109, 0, 0, 0, 0 },
+                                           { 96, 179, 145, 0, 0, 0, 0 },
+                                           { 164, 77, 114, 0, 0, 0, 0 },
+                                           { 87, 94, 156, 0, 0, 0, 0 },
+                                           { 105, 57, 173, 0, 0, 0, 0 },
+                                           { 63, 158, 137, 0, 0, 0, 0 },
+                                           { 236, 102, 156, 0, 0, 0, 0 },
+                                           { 197, 115, 153, 0, 0, 0, 0 },
+                                           { 245, 106, 154, 0, 0, 0, 0 },
+                                       },
+                                       {
+                                           // 5 colors
+                                           { 179, 64, 97, 129, 0, 0, 0 },
+                                           { 137, 56, 88, 125, 0, 0, 0 },
+                                           { 82, 107, 61, 118, 0, 0, 0 },
+                                           { 59, 113, 86, 115, 0, 0, 0 },
+                                           { 23, 88, 118, 130, 0, 0, 0 },
+                                           { 213, 66, 90, 125, 0, 0, 0 },
+                                           { 37, 181, 103, 121, 0, 0, 0 },
+                                           { 47, 188, 61, 131, 0, 0, 0 },
+                                           { 104, 185, 103, 144, 0, 0, 0 },
+                                           { 163, 39, 76, 112, 0, 0, 0 },
+                                           { 94, 74, 131, 126, 0, 0, 0 },
+                                           { 142, 42, 103, 163, 0, 0, 0 },
+                                           { 53, 162, 99, 149, 0, 0, 0 },
+                                           { 239, 54, 84, 108, 0, 0, 0 },
+                                           { 203, 84, 110, 147, 0, 0, 0 },
+                                           { 248, 70, 105, 151, 0, 0, 0 },
+                                       },
+                                       {
+                                           // 6 colors
+                                           { 189, 50, 67, 90, 130, 0, 0 },
+                                           { 114, 50, 55, 90, 123, 0, 0 },
+                                           { 66, 76, 54, 82, 128, 0, 0 },
+                                           { 43, 69, 69, 80, 129, 0, 0 },
+                                           { 22, 59, 87, 88, 141, 0, 0 },
+                                           { 203, 49, 68, 87, 122, 0, 0 },
+                                           { 43, 157, 74, 104, 146, 0, 0 },
+                                           { 54, 138, 51, 95, 138, 0, 0 },
+                                           { 82, 171, 58, 102, 146, 0, 0 },
+                                           { 129, 38, 59, 64, 168, 0, 0 },
+                                           { 56, 67, 119, 92, 112, 0, 0 },
+                                           { 96, 62, 53, 132, 82, 0, 0 },
+                                           { 60, 147, 77, 108, 145, 0, 0 },
+                                           { 238, 76, 73, 93, 148, 0, 0 },
+                                           { 189, 86, 73, 103, 157, 0, 0 },
+                                           { 246, 62, 75, 83, 167, 0, 0 },
+                                       },
+                                       {
+                                           // 7 colors
+                                           { 179, 42, 51, 73, 99, 134, 0 },
+                                           { 119, 52, 52, 61, 64, 114, 0 },
+                                           { 53, 77, 35, 65, 71, 131, 0 },
+                                           { 38, 70, 51, 68, 89, 144, 0 },
+                                           { 23, 65, 128, 73, 97, 131, 0 },
+                                           { 210, 47, 52, 63, 81, 143, 0 },
+                                           { 42, 159, 57, 68, 98, 143, 0 },
+                                           { 49, 153, 45, 82, 93, 143, 0 },
+                                           { 81, 169, 52, 72, 113, 151, 0 },
+                                           { 136, 46, 35, 56, 75, 96, 0 },
+                                           { 57, 84, 109, 47, 107, 131, 0 },
+                                           { 128, 78, 57, 36, 128, 85, 0 },
+                                           { 54, 149, 68, 77, 94, 153, 0 },
+                                           { 243, 58, 50, 71, 81, 167, 0 },
+                                           { 189, 92, 64, 70, 121, 173, 0 },
+                                           { 248, 35, 38, 51, 82, 201, 0 },
+                                       },
+                                       {
+                                           // 8 colors
+                                           { 201, 40, 36, 42, 64, 92, 123 },
+                                           { 116, 43, 33, 43, 73, 102, 128 },
+                                           { 46, 77, 37, 69, 62, 78, 150 },
+                                           { 40, 65, 52, 50, 76, 89, 133 },
+                                           { 28, 48, 91, 17, 64, 77, 133 },
+                                           { 218, 43, 43, 37, 56, 72, 163 },
+                                           { 41, 155, 44, 83, 82, 129, 180 },
+                                           { 44, 141, 29, 55, 64, 89, 147 },
+                                           { 92, 166, 48, 45, 59, 126, 179 },
+                                           { 169, 35, 49, 41, 36, 99, 139 },
+                                           { 55, 77, 77, 56, 60, 75, 156 },
+                                           { 155, 81, 51, 64, 57, 182, 255 },
+                                           { 60, 134, 49, 49, 93, 128, 174 },
+                                           { 244, 98, 51, 46, 22, 73, 238 },
+                                           { 189, 70, 40, 87, 93, 79, 201 },
+                                           { 248, 54, 49, 40, 29, 42, 227 },
+                                       }
+                                     };
 
 static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = {
   // (3, 0, 0, 0), (3, 2, 0, 0), (3, 3, 2, 0), (3, 3, 2, 2),
@@ -848,23 +851,23 @@
 };
 #endif  // CONFIG_PALETTE
 
-const aom_tree_index av1_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = {
+const aom_tree_index av1_tx_size_tree[MAX_TX_DEPTH][TREE_SIZE(TX_SIZES)] = {
   {
       // Max tx_size is 8X8
-      -TX_4X4, -TX_8X8,
+      -0, -1,
   },
   {
       // Max tx_size is 16X16
-      -TX_4X4, 2, -TX_8X8, -TX_16X16,
+      -0, 2, -1, -2,
   },
   {
       // Max tx_size is 32X32
-      -TX_4X4, 2, -TX_8X8, 4, -TX_16X16, -TX_32X32,
+      -0, 2, -1, 4, -2, -3,
   },
 };
 
-static const aom_prob default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS]
-                                          [TX_SIZES - 1] = {
+static const aom_prob default_tx_size_prob[MAX_TX_DEPTH][TX_SIZE_CONTEXTS]
+                                          [MAX_TX_DEPTH] = {
                                             {
                                                 // Max tx_size is 8X8
                                                 { 100 },
@@ -902,30 +905,24 @@
 int av1_get_palette_color_context(const uint8_t *color_map, int cols, int r,
                                   int c, int n, uint8_t *color_order,
                                   int *color_idx) {
-  int i, j, max, max_idx, temp;
+  int i;
+  // The +10 below should not be needed. But we get a warning "array subscript
+  // is above array bounds [-Werror=array-bounds]" without it, possibly due to
+  // this (or similar) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
   int scores[PALETTE_MAX_SIZE + 10];
-  int weights[4] = { 3, 2, 3, 2 };
+  const int weights[4] = { 3, 2, 3, 2 };
   int color_ctx = 0;
   int color_neighbors[4];
   int inverse_color_order[PALETTE_MAX_SIZE];
   assert(n <= PALETTE_MAX_SIZE);
 
-  if (c - 1 >= 0)
-    color_neighbors[0] = color_map[r * cols + c - 1];
-  else
-    color_neighbors[0] = -1;
-  if (c - 1 >= 0 && r - 1 >= 0)
-    color_neighbors[1] = color_map[(r - 1) * cols + c - 1];
-  else
-    color_neighbors[1] = -1;
-  if (r - 1 >= 0)
-    color_neighbors[2] = color_map[(r - 1) * cols + c];
-  else
-    color_neighbors[2] = -1;
-  if (r - 1 >= 0 && c + 1 <= cols - 1)
-    color_neighbors[3] = color_map[(r - 1) * cols + c + 1];
-  else
-    color_neighbors[3] = -1;
+  color_neighbors[0] = (c - 1 >= 0) ? color_map[r * cols + c - 1] : -1;
+  color_neighbors[1] =
+      (c - 1 >= 0 && r - 1 >= 0) ? color_map[(r - 1) * cols + c - 1] : -1;
+  color_neighbors[2] = (r - 1 >= 0) ? color_map[(r - 1) * cols + c] : -1;
+  color_neighbors[3] = (r - 1 >= 0 && c + 1 <= cols - 1)
+                           ? color_map[(r - 1) * cols + c + 1]
+                           : -1;
 
   for (i = 0; i < PALETTE_MAX_SIZE; ++i) {
     color_order[i] = i;
@@ -933,23 +930,25 @@
   }
   memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
   for (i = 0; i < 4; ++i) {
-    if (color_neighbors[i] >= 0) scores[color_neighbors[i]] += weights[i];
+    if (color_neighbors[i] >= 0) {
+      scores[color_neighbors[i]] += weights[i];
+    }
   }
 
+  // Get the top 4 scores (sorted from large to small).
   for (i = 0; i < 4; ++i) {
-    max = scores[i];
-    max_idx = i;
-    j = i + 1;
-    while (j < n) {
+    int max = scores[i];
+    int max_idx = i;
+    int j;
+    for (j = i + 1; j < n; ++j) {
       if (scores[j] > max) {
         max = scores[j];
         max_idx = j;
       }
-      ++j;
     }
 
     if (max_idx != i) {
-      temp = scores[i];
+      int temp = scores[i];
       scores[i] = scores[max_idx];
       scores[max_idx] = temp;
 
@@ -1310,13 +1309,21 @@
 
 static const aom_prob
     default_intra_ext_tx_prob[EXT_TX_SIZES][TX_TYPES][TX_TYPES - 1] = {
+#if CONFIG_CB4X4
+      { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
+#endif
       { { 240, 85, 128 }, { 4, 1, 248 }, { 4, 1, 8 }, { 4, 248, 128 } },
       { { 244, 85, 128 }, { 8, 2, 248 }, { 8, 2, 8 }, { 8, 248, 128 } },
       { { 248, 85, 128 }, { 16, 4, 248 }, { 16, 4, 8 }, { 16, 248, 128 } },
     };
 
 static const aom_prob default_inter_ext_tx_prob[EXT_TX_SIZES][TX_TYPES - 1] = {
-  { 160, 85, 128 }, { 176, 85, 128 }, { 192, 85, 128 },
+#if CONFIG_CB4X4
+  { 160, 85, 128 },
+#endif
+  { 160, 85, 128 },
+  { 176, 85, 128 },
+  { 192, 85, 128 },
 };
 #endif  // CONFIG_EXT_TX
 
@@ -1561,7 +1568,7 @@
 
 #if CONFIG_VAR_TX && CONFIG_EXT_TX && CONFIG_RECT_TX
   if (cm->tx_mode == TX_MODE_SELECT) {
-    for (i = 0; i < TX_SIZES - 1; ++i) {
+    for (i = 0; i < MAX_TX_DEPTH; ++i) {
       fc->rect_tx_prob[i] =
           av1_mode_mv_merge_probs(pre_fc->rect_tx_prob[i], counts->rect_tx[i]);
     }
@@ -1593,7 +1600,7 @@
   const FRAME_COUNTS *counts = &cm->counts;
 
   if (cm->tx_mode == TX_MODE_SELECT) {
-    for (i = 0; i < TX_SIZES - 1; ++i) {
+    for (i = 0; i < MAX_TX_DEPTH; ++i) {
       for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
         aom_tree_merge_probs(av1_tx_size_tree[i], pre_fc->tx_size_probs[i][j],
                              counts->tx_size[i][j], fc->tx_size_probs[i][j]);
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 3043114..79b27da 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -125,7 +125,7 @@
 #else
   aom_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS - 1];
 #endif  // CONFIG_EXT_REFS
-  aom_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
+  aom_prob tx_size_probs[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][MAX_TX_DEPTH];
 #if CONFIG_VAR_TX
   aom_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -237,7 +237,7 @@
   // to use forward updates for the coeff probs, and as such it does not really
   // belong into this structure.
   unsigned int tx_size_totals[TX_SIZES];
-  unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
+  unsigned int tx_size[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][TX_SIZES];
 #if CONFIG_VAR_TX
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -324,7 +324,7 @@
 extern const aom_tree_index av1_palette_color_tree[PALETTE_MAX_SIZE - 1]
                                                   [TREE_SIZE(PALETTE_COLORS)];
 #endif  // CONFIG_PALETTE
-extern const aom_tree_index av1_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)];
+extern const aom_tree_index av1_tx_size_tree[MAX_TX_DEPTH][TREE_SIZE(TX_SIZES)];
 #if CONFIG_EXT_INTRA
 extern const aom_tree_index av1_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
 #endif  // CONFIG_EXT_INTRA
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 0a1f7a3..a684eed 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -131,6 +131,9 @@
 
 // block transform size
 typedef enum ATTRIBUTE_PACKED {
+#if CONFIG_CB4X4
+  TX_2X2,  // 2x2 transform
+#endif
   TX_4X4,    // 4x4 transform
   TX_8X8,    // 8x8 transform
   TX_16X16,  // 16x16 transform
@@ -148,6 +151,8 @@
   TX_INVALID = 255          // Invalid transform size
 } TX_SIZE;
 
+#define MAX_TX_DEPTH (TX_32X32 - TX_4X4)
+
 #define MAX_TX_SIZE_LOG2 5
 #define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
 #define MIN_TX_SIZE_LOG2 2
@@ -206,8 +211,12 @@
 #define EXT_TX_SETS_INTER 4  // Sets of transform selections for INTER
 #define EXT_TX_SETS_INTRA 3  // Sets of transform selections for INTRA
 #else
+#if CONFIG_CB4X4
+#define EXT_TX_SIZES 4  // number of sizes that use extended transforms
+#else
 #define EXT_TX_SIZES 3  // number of sizes that use extended transforms
-#endif                  // CONFIG_EXT_TX
+#endif
+#endif  // CONFIG_EXT_TX
 
 typedef enum {
   AOM_LAST_FLAG = 1 << 0,
diff --git a/av1/common/loopfilter.c b/av1/common/loopfilter.c
index dc7ee18..dec5514 100644
--- a/av1/common/loopfilter.c
+++ b/av1/common/loopfilter.c
@@ -40,6 +40,9 @@
 //
 // A loopfilter should be applied to every other 8x8 horizontally.
 static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
+#if CONFIG_CB4X4
+  0xffffffffffffffffULL,  // TX_2X2
+#endif
   0xffffffffffffffffULL,  // TX_4X4
   0xffffffffffffffffULL,  // TX_8x8
   0x5555555555555555ULL,  // TX_16x16
@@ -64,6 +67,9 @@
 //
 // A loopfilter should be applied to every other 4 the row vertically.
 static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
+#if CONFIG_CB4X4
+  0xffffffffffffffffULL,  // TX_4X4
+#endif
   0xffffffffffffffffULL,  // TX_4X4
   0xffffffffffffffffULL,  // TX_8x8
   0x00ff00ff00ff00ffULL,  // TX_16x16
@@ -142,6 +148,9 @@
 
 // 16 bit masks for uv transform sizes.
 static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
+#if CONFIG_CB4X4
+  0xffff,  // TX_2X2
+#endif
   0xffff,  // TX_4X4
   0xffff,  // TX_8x8
   0x5555,  // TX_16x16
@@ -149,6 +158,9 @@
 };
 
 static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
+#if CONFIG_CB4X4
+  0xffff,  // TX_2X2
+#endif
   0xffff,  // TX_4X4
   0xffff,  // TX_8x8
   0x0f0f,  // TX_16x16
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index be1cbc1..f9b51aa 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -733,7 +733,8 @@
                                          TX_SIZE tx_size) {
   int above = *above_ctx < tx_size;
   int left = *left_ctx < tx_size;
-  return (tx_size - 1) * 3 + above + left;
+
+  return (tx_size - TX_8X8) * 3 + above + left;
 }
 #endif
 
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index b3ef1c4..b906749 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -186,8 +186,13 @@
   if (!has_left) left_ctx = above_ctx;
 
   if (!has_above) above_ctx = left_ctx;
-
+#if CONFIG_CB4X4
+  // TODO(jingning): Temporary setup. Will rework this after the cb4x4
+  // framework is up running.
+  return (above_ctx + left_ctx) > max_tx_size + 1;
+#else
   return (above_ctx + left_ctx) > max_tx_size;
+#endif
 }
 
 #if CONFIG_VAR_TX
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 3eec384..5082d7b 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -418,7 +418,6 @@
                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
                           const struct scale_factors *sf);
 
-#if CONFIG_DUAL_FILTER
 // Detect if the block have sub-pixel level motion vectors
 // per component.
 static INLINE int has_subpel_mv_component(const MODE_INFO *const mi,
@@ -460,60 +459,22 @@
 
   return 0;
 }
-#endif
 
-#if CONFIG_EXT_INTERP
 static INLINE int av1_is_interp_needed(const MACROBLOCKD *const xd) {
   MODE_INFO *const mi = xd->mi[0];
-  MB_MODE_INFO *const mbmi = &mi->mbmi;
-  const BLOCK_SIZE bsize = mbmi->sb_type;
-  const int is_compound = has_second_ref(mbmi);
-  int intpel_mv = 1;
-  int plane;
-
-#if SUPPORT_NONINTERPOLATING_FILTERS
-  // TODO(debargha): This is is currently only for experimentation
-  // with non-interpolating filters. Remove later.
-  // If any of the filters are non-interpolating, then indicate the
-  // interpolation filter always.
-  int i;
-  for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
-    if (!IsInterpolatingFilter(i)) return 1;
-  }
-#endif
-
-  // For scaled references, interpolation filter is indicated all the time.
-  if (av1_is_scaled(&xd->block_refs[0]->sf)) return 1;
-  if (is_compound && av1_is_scaled(&xd->block_refs[1]->sf)) return 1;
-
-  if (bsize < BLOCK_8X8) {
-    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
-      const PARTITION_TYPE bp = BLOCK_8X8 - bsize;
-      const struct macroblockd_plane *const pd = &xd->plane[plane];
-      const int have_vsplit = bp != PARTITION_HORZ;
-      const int have_hsplit = bp != PARTITION_VERT;
-      const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
-      const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
-      int ref;
-      for (ref = 0; ref < 1 + is_compound; ++ref) {
-        int x, y;
-        for (y = 0; y < num_4x4_h; ++y)
-          for (x = 0; x < num_4x4_w; ++x) {
-            const MV mv = average_split_mvs(pd, mi, ref, y * 2 + x);
-            if (mv_has_subpel(&mv)) return 1;
-          }
+  const int is_compound = has_second_ref(&mi->mbmi);
+  int ref;
+  for (ref = 0; ref < 1 + is_compound; ++ref) {
+    int row_col;
+    for (row_col = 0; row_col < 2; ++row_col) {
+      const int dir = (ref << 1) + row_col;
+      if (has_subpel_mv_component(mi, xd, dir)) {
+        return 1;
       }
     }
-    return 0;
-  } else {
-    intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
-    if (is_compound && intpel_mv) {
-      intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
-    }
   }
-  return !intpel_mv;
+  return 0;
 }
-#endif  // CONFIG_EXT_INTERP
 
 #if CONFIG_MOTION_VAR
 const uint8_t *av1_get_obmc_mask(int length);
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 1281843..693386f 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -3801,6 +3801,9 @@
 #endif  // CONFIG_EXT_TX
 
 const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
+#if CONFIG_CB4X4
+  { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+#endif
   { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
   { default_scan_8x8, av1_default_iscan_8x8, default_scan_8x8_neighbors },
   { default_scan_16x16, av1_default_iscan_16x16, default_scan_16x16_neighbors },
@@ -3809,6 +3812,27 @@
 
 #if CONFIG_EXT_TX
 const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+#if CONFIG_CB4X4
+  {
+      // TX_2X2
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+      { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+      { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+      { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+  },
+#endif
   {
       // TX_4X4
       { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -3896,6 +3920,27 @@
 };
 
 const SCAN_ORDER av1_inter_scan_orders[TX_SIZES_ALL][TX_TYPES] = {
+#if CONFIG_CB4X4
+  {
+      // TX_2X2
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+      { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+      { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+      { mrow_scan_4x4, av1_mrow_iscan_4x4, mrow_scan_4x4_neighbors },
+      { mcol_scan_4x4, av1_mcol_iscan_4x4, mcol_scan_4x4_neighbors },
+  },
+#endif
   {
       // TX_4X4
       { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
@@ -4134,9 +4179,16 @@
   }
 };
 
-#else   // CONFIG_EXT_TX
+#else  // CONFIG_EXT_TX
 
 const SCAN_ORDER av1_intra_scan_orders[TX_SIZES][TX_TYPES] = {
+#if CONFIG_CB4X4
+  { // TX_2X2
+    { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
+    { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
+    { col_scan_4x4, av1_col_iscan_4x4, col_scan_4x4_neighbors },
+    { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors } },
+#endif
   { // TX_4X4
     { default_scan_4x4, av1_default_iscan_4x4, default_scan_4x4_neighbors },
     { row_scan_4x4, av1_row_iscan_4x4, row_scan_4x4_neighbors },
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 8ab549b..b3617d1 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3661,7 +3661,7 @@
 #endif
 
   if (cm->tx_mode == TX_MODE_SELECT) {
-    for (i = 0; i < TX_SIZES - 1; ++i)
+    for (i = 0; i < MAX_TX_DEPTH; ++i)
       for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
         for (k = 0; k < i + 1; ++k)
           av1_diff_update_prob(&r, &fc->tx_size_probs[i][j][k], ACCT_STR);
@@ -3674,7 +3674,7 @@
     av1_diff_update_prob(&r, &fc->txfm_partition_prob[k], ACCT_STR);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
   if (cm->tx_mode == TX_MODE_SELECT) {
-    for (i = 1; i < TX_SIZES - 1; ++i)
+    for (i = 1; i < MAX_TX_DEPTH; ++i)
       av1_diff_update_prob(&r, &fc->rect_tx_prob[i], ACCT_STR);
   }
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 3993e72..31183c0 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -350,11 +350,11 @@
                                      int tx_size_cat, aom_reader *r) {
   FRAME_COUNTS *counts = xd->counts;
   const int ctx = get_tx_size_context(xd);
-  int tx_size =
-      aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
-                    cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
-  if (counts) ++counts->tx_size[tx_size_cat][ctx][tx_size];
-  return (TX_SIZE)tx_size;
+  int depth = aom_read_tree(r, av1_tx_size_tree[tx_size_cat],
+                            cm->fc->tx_size_probs[tx_size_cat][ctx], ACCT_STR);
+  TX_SIZE tx_size = depth_to_tx_size(depth);
+  if (counts) ++counts->tx_size[tx_size_cat][ctx][depth];
+  return tx_size;
 }
 
 static TX_SIZE read_tx_size_intra(AV1_COMMON *cm, MACROBLOCKD *xd,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 377af50..899aa81 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -74,7 +74,7 @@
 static struct av1_token palette_color_encodings[PALETTE_MAX_SIZE - 1]
                                                [PALETTE_MAX_SIZE];
 #endif  // CONFIG_PALETTE
-static const struct av1_token tx_size_encodings[TX_SIZES - 1][TX_SIZES] = {
+static const struct av1_token tx_size_encodings[MAX_TX_DEPTH][TX_SIZES] = {
   { { 0, 1 }, { 1, 1 } },                      // Max tx_size is 8X8
   { { 0, 1 }, { 2, 2 }, { 3, 2 } },            // Max tx_size is 16X16
   { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } },  // Max tx_size is 32X32
@@ -414,6 +414,7 @@
     const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
                                      : intra_tx_size_cat_lookup[bsize];
     const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+    const int depth = tx_size_to_depth(coded_tx_size);
 
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
     assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
@@ -423,7 +424,7 @@
 
     av1_write_token(w, av1_tx_size_tree[tx_size_cat],
                     cm->fc->tx_size_probs[tx_size_cat][tx_size_ctx],
-                    &tx_size_encodings[tx_size_cat][coded_tx_size]);
+                    &tx_size_encodings[tx_size_cat][depth]);
   }
 }
 
@@ -2873,7 +2874,7 @@
                               FRAME_COUNTS *counts) {
   if (cm->tx_mode == TX_MODE_SELECT) {
     int i, j;
-    for (i = 0; i < TX_SIZES - 1; ++i)
+    for (i = 0; i < MAX_TX_DEPTH; ++i)
       for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
         prob_diff_update(av1_tx_size_tree[i], cm->fc->tx_size_probs[i][j],
                          counts->tx_size[i][j], i + 2, w);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 3733efc..c94c1d8 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4442,8 +4442,10 @@
   av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end);
 
   // Set up pointers to per thread motion search counters.
-  td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
-  td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
+  this_tile->m_search_count = 0;   // Count of motion search hits.
+  this_tile->ex_search_count = 0;  // Exhaustive mesh search hits.
+  td->mb.m_search_count_ptr = &this_tile->m_search_count;
+  td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
 
   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
        mi_row += cm->mib_size) {
@@ -4484,10 +4486,35 @@
 #define MIN_TRANS_THRESH 8
 #define GLOBAL_MOTION_ADVANTAGE_THRESH 0.60
 #define GLOBAL_MOTION_MODEL ROTZOOM
-// TODO(sarahparker) This function needs to be adjusted
-// to accomodate changes in the paraemter integerization.
-// Commenting it out until the fix is made.
-/*
+
+// Adds some offset to a global motion parameter and handles
+// all of the necessary precision shifts, clamping, and
+// zero-centering.
+static int16_t add_param_offset(int param_index, int16_t param_value,
+                                int16_t offset) {
+  int scale_vals[2] = { GM_ALPHA_PREC_DIFF, GM_TRANS_PREC_DIFF };
+  int clamp_vals[2] = { GM_ALPHA_MAX, GM_TRANS_MAX };
+  int is_trans_param = param_index < 2;
+  int is_one_centered = (!is_trans_param) && (param_index & 1);
+
+  // Make parameter zero-centered and offset the shift that was done to make
+  // it compatible with the warped model
+  param_value = (param_value - (is_one_centered << WARPEDMODEL_PREC_BITS)) >>
+                scale_vals[is_trans_param];
+  // Add desired offset to the rescaled/zero-centered parameter
+  param_value += offset;
+  // Clamp the parameter so it does not overflow the number of bits allotted
+  // to it in the bitstream
+  param_value = (int16_t)clamp(param_value, -clamp_vals[is_trans_param],
+                               clamp_vals[is_trans_param]);
+  // Rescale the parameter to WARPEDMODEL_PRECIION_BITS so it is compatible
+  // with the warped motion library
+  param_value *= (1 << scale_vals[is_trans_param]);
+
+  // Undo the zero-centring step if necessary
+  return param_value + (is_one_centered << WARPEDMODEL_PREC_BITS);
+}
+
 static void refine_integerized_param(WarpedMotionParams *wm,
 #if CONFIG_AOM_HIGHBITDEPTH
                                      int use_hbd, int bd,
@@ -4500,7 +4527,7 @@
   int n_params = n_trans_model_params[wm->wmtype];
   int16_t *param_mat = (int16_t *)wm->wmmat;
   double step_error;
-  int step;
+  int16_t step;
   int16_t *param;
   int16_t curr_param;
   int16_t best_param;
@@ -4519,9 +4546,7 @@
     best_param = curr_param;
     for (i = 0; i < n_refinements; i++) {
       // look to the left
-      *param =
-          (int16_t)clamp(curr_param - step, p < 2 ? GM_TRANS_MIN : GM_ALPHA_MIN,
-                         p < 2 ? GM_TRANS_MAX : GM_ALPHA_MAX);
+      *param = add_param_offset(p, curr_param, -step);
       step_error =
           av1_warp_erroradv(wm,
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -4538,9 +4563,7 @@
       }
 
       // look to the right
-      *param =
-          (int16_t)clamp(curr_param + step, p < 2 ? GM_TRANS_MIN : GM_ALPHA_MIN,
-                         p < 2 ? GM_TRANS_MAX : GM_ALPHA_MAX);
+      *param = add_param_offset(p, curr_param, step);
       step_error =
           av1_warp_erroradv(wm,
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -4564,7 +4587,6 @@
     *param = best_param;
   }
 }
-*/
 
 static void convert_to_params(const double *params, TransformationType type,
                               int16_t *model) {
@@ -4624,8 +4646,6 @@
   av1_zero(*td->counts);
   av1_zero(rdc->coef_counts);
   av1_zero(rdc->comp_pred_diff);
-  rdc->m_search_count = 0;   // Count of motion search hits.
-  rdc->ex_search_count = 0;  // Exhaustive mesh search hits.
 
 #if CONFIG_GLOBAL_MOTION
   aom_clear_system_state();
@@ -4643,6 +4663,14 @@
           convert_model_to_params(params, GLOBAL_MOTION_MODEL,
                                   &cm->global_motion[frame]);
           if (get_gmtype(&cm->global_motion[frame]) > GLOBAL_ZERO) {
+            refine_integerized_param(
+                &cm->global_motion[frame].motion_params,
+#if CONFIG_AOM_HIGHBITDEPTH
+                xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd,
+#endif  // CONFIG_AOM_HIGHBITDEPTH
+                ref_buf->y_buffer, ref_buf->y_width, ref_buf->y_height,
+                ref_buf->y_stride, cpi->Source->y_buffer, cpi->Source->y_width,
+                cpi->Source->y_height, cpi->Source->y_stride, 3);
             // compute the advantage of using gm parameters over 0 motion
             erroradvantage = av1_warp_erroradv(
                 &cm->global_motion[frame].motion_params,
@@ -5206,6 +5234,7 @@
       const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
                                        : intra_tx_size_cat_lookup[bsize];
       const TX_SIZE coded_tx_size = txsize_sqr_up_map[mbmi->tx_size];
+      const int depth = tx_size_to_depth(coded_tx_size);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
       assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -5219,7 +5248,7 @@
         if (is_inter) {
           tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts);
         } else {
-          ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+          ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
           if (mbmi->tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count;
         }
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -5227,7 +5256,7 @@
 #endif
 #endif
 #if !CONFIG_VAR_TX
-      ++td->counts->tx_size[tx_size_cat][tx_size_ctx][coded_tx_size];
+      ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
 #endif
     } else {
       int i, j;
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index 6b7e72c..c5459dc 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -81,7 +81,7 @@
   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   const int eob = p->eobs[block];
   const PLANE_TYPE plane_type = pd->plane_type;
-  const int default_eob = get_tx2d_size(tx_size);
+  const int default_eob = tx_size_2d[tx_size];
   const int16_t *const dequant_ptr = pd->dequant;
   const uint8_t *const band_translate = get_band_translate(tx_size);
   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
@@ -826,12 +826,9 @@
   }
 
 #if CONFIG_VAR_TX
-  for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i) {
-    a[i] = a[0];
-  }
-  for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i) {
-    l[i] = l[0];
-  }
+  for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) a[i] = a[0];
+
+  for (i = 0; i < tx_size_high_unit[tx_size]; ++i) l[i] = l[0];
 #endif
 
   if (p->eobs[block]) *(args->skip) = 0;
@@ -885,25 +882,23 @@
   if (tx_size == plane_tx_size) {
     encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
   } else {
-    int bsl = b_width_log2_lookup[bsize];
+    int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
     int i;
-
     assert(bsl > 0);
-    --bsl;
-
 #if CONFIG_EXT_TX
     assert(tx_size < TX_SIZES);
 #endif  // CONFIG_EXT_TX
 
     for (i = 0; i < 4; ++i) {
-      const int offsetr = blk_row + ((i >> 1) << bsl);
-      const int offsetc = blk_col + ((i & 0x01) << bsl);
-      int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+      const int offsetr = blk_row + ((i >> 1) * bsl);
+      const int offsetc = blk_col + ((i & 0x01) * bsl);
+      const TX_SIZE sub_txs = tx_size - 1;
+      int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
       encode_block_inter(plane, block + i * step, offsetr, offsetc, plane_bsize,
-                         tx_size - 1, arg);
+                         sub_txs, arg);
     }
   }
 }
@@ -983,14 +978,15 @@
     // TODO(jingning): Clean this up.
     const struct macroblockd_plane *const pd = &xd->plane[plane];
     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-    const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+    const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+    const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
     const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
     const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
-    const int bh = num_4x4_blocks_wide_lookup[txb_size];
+    const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
+    const int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
     int idx, idy;
     int block = 0;
-    int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+    int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
     av1_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]);
 #else
     const struct macroblockd_plane *const pd = &xd->plane[plane];
@@ -1009,7 +1005,7 @@
     } else {
 #endif
       for (idy = 0; idy < mi_height; idy += bh) {
-        for (idx = 0; idx < mi_width; idx += bh) {
+        for (idx = 0; idx < mi_width; idx += bw) {
           encode_block_inter(plane, block, idy, idx, plane_bsize, max_tx_size,
                              &arg);
           block += step;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 629eb46..03f6ffd 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -3596,7 +3596,7 @@
   recon_err = aom_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
   if (cpi->twopass.total_left_stats.coded_error != 0.0)
-    fprintf(f, "%10u %dx%d  %10d %10d %d %d %10d %10d %10d %10d"
+    fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
        "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
        "%10"PRId64" %10"PRId64" %10d "
        "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
@@ -3605,8 +3605,6 @@
         "%10lf %8u %10"PRId64" %10d %10d %10d\n",
         cpi->common.current_video_frame,
         cm->width, cm->height,
-        cpi->td.rd_counts.m_search_count,
-        cpi->td.rd_counts.ex_search_count,
         cpi->rc.source_alt_ref_pending,
         cpi->rc.source_alt_ref_active,
         cpi->rc.this_frame_target,
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 8738609..00abc71 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -268,13 +268,13 @@
   TileInfo tile_info;
   int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
   int mode_map[BLOCK_SIZES][MAX_MODES];
+  int m_search_count;
+  int ex_search_count;
 } TileDataEnc;
 
 typedef struct RD_COUNTS {
   av1_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
   int64_t comp_pred_diff[REFERENCE_MODES];
-  int m_search_count;
-  int ex_search_count;
 } RD_COUNTS;
 
 typedef struct ThreadData {
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 117d0ed..5876d15 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -27,10 +27,6 @@
             for (n = 0; n < ENTROPY_TOKENS; n++)
               td->rd_counts.coef_counts[i][j][k][l][m][n] +=
                   td_t->rd_counts.coef_counts[i][j][k][l][m][n];
-
-  // Counts of all motion searches and exhuastive mesh searches.
-  td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
-  td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
 }
 
 static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 5015837..cfc8cb3 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -110,7 +110,7 @@
   }
 #endif  // CONFIG_PALETTE
 
-  for (i = 0; i < TX_SIZES - 1; ++i)
+  for (i = 0; i < MAX_TX_DEPTH; ++i)
     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
       av1_cost_tokens(cpi->tx_size_cost[i][j], fc->tx_size_probs[i][j],
                       av1_tx_size_tree[i]);
@@ -720,30 +720,41 @@
 
 #if CONFIG_DUAL_FILTER
 int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *const xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  int inter_filter_cost = 0;
-  int dir;
+  const AV1_COMMON *const cm = &cpi->common;
+  if (cm->interp_filter == SWITCHABLE) {
+    const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+    int inter_filter_cost = 0;
+    int dir;
 
-  for (dir = 0; dir < 2; ++dir) {
-    if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
-        (mbmi->ref_frame[1] > INTRA_FRAME &&
-         has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
-      const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
-      inter_filter_cost +=
-          cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
+    for (dir = 0; dir < 2; ++dir) {
+      if (has_subpel_mv_component(xd->mi[0], xd, dir) ||
+          (mbmi->ref_frame[1] > INTRA_FRAME &&
+           has_subpel_mv_component(xd->mi[0], xd, dir + 2))) {
+        const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
+        inter_filter_cost +=
+            cpi->switchable_interp_costs[ctx][mbmi->interp_filter[dir]];
+      }
     }
+    return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
+  } else {
+    return 0;
   }
-  return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
 }
 #else
 int av1_get_switchable_rate(const AV1_COMP *cpi, const MACROBLOCKD *const xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
-  const int ctx = av1_get_pred_context_switchable_interp(xd);
+  const AV1_COMMON *const cm = &cpi->common;
+  if (cm->interp_filter == SWITCHABLE) {
 #if CONFIG_EXT_INTERP
-  if (!av1_is_interp_needed(xd)) return 0;
-#endif  // CONFIG_EXT_INTERP
-  return SWITCHABLE_INTERP_RATE_FACTOR *
-         cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+    if (av1_is_interp_needed(xd))
+#endif
+    {
+      const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+      const int ctx = av1_get_pred_context_switchable_interp(xd);
+      return SWITCHABLE_INTERP_RATE_FACTOR *
+             cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+    }
+  }
+  return 0;
 }
 #endif
 
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 76d471e..c6b9979 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1318,9 +1318,9 @@
   const int tx_size_cat =
       is_inter ? inter_tx_size_cat_lookup[bs] : intra_tx_size_cat_lookup[bs];
   const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
+  const int depth = tx_size_to_depth(coded_tx_size);
   const int tx_select = cm->tx_mode == TX_MODE_SELECT;
-  const int r_tx_size =
-      cpi->tx_size_cost[tx_size_cat][tx_size_ctx][coded_tx_size];
+  const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
 
   assert(skip_prob > 0);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
@@ -2746,7 +2746,7 @@
       // not the tokenonly rate.
       this_rate_tokenonly -=
           cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
-                           [mic->mbmi.tx_size];
+                           [tx_size_to_depth(mic->mbmi.tx_size)];
     }
 #if CONFIG_PALETTE
     if (cpi->common.allow_screen_content_tools && mic->mbmi.mode == DC_PRED)
@@ -3222,7 +3222,8 @@
     int bh = num_4x4_blocks_wide_lookup[txb_size];
     int idx, idy;
     int block = 0;
-    int step = 1 << (max_txsize_lookup[plane_bsize] * 2);
+    int step = tx_size_wide_unit[max_txsize_lookup[plane_bsize]] *
+               tx_size_high_unit[max_txsize_lookup[plane_bsize]];
     ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
     ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
     TXFM_CONTEXT tx_above[MAX_MIB_SIZE];
@@ -3519,6 +3520,7 @@
     coeff_ctx = combine_entropy_contexts(ta[0], tl[0]);
     av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
                       plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+
     for (i = 0; i < num_4x4_blocks_wide_txsize_lookup[tx_size]; ++i)
       ta[i] = !(p->eobs[block] == 0);
     for (i = 0; i < num_4x4_blocks_high_txsize_lookup[tx_size]; ++i)
@@ -6367,10 +6369,6 @@
   const int this_mode = mbmi->mode;
   int refs[2] = { mbmi->ref_frame[0],
                   (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
-#if CONFIG_DUAL_FILTER
-  (void)pred_filter_search;
-  return SWITCHABLE;
-#else
   if (pred_filter_search) {
     InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
     if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
@@ -6385,7 +6383,6 @@
 #endif  // CONFIG_EXT_INTER
       best_filter = af;
   }
-#endif
   if (is_comp_pred) {
     if (cpi->sf.adaptive_mode_search) {
 #if CONFIG_EXT_INTER
@@ -6448,15 +6445,8 @@
 #endif  // CONFIG_EXT_INTER
     }
   }
-  if (cm->interp_filter != BILINEAR) {
-    if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
-      best_filter = EIGHTTAP_REGULAR;
-    }
-#if CONFIG_EXT_INTERP
-    else if (!av1_is_interp_needed(xd) && cm->interp_filter == SWITCHABLE) {
-      best_filter = EIGHTTAP_REGULAR;
-    }
-#endif
+  if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
+    best_filter = EIGHTTAP_REGULAR;
   }
   return best_filter;
 }
@@ -6676,6 +6666,7 @@
   int_mv cur_mv[2];
   int rate_mv = 0;
 #if CONFIG_EXT_INTER
+  int pred_exists = 1;
   const int bw = 4 * num_4x4_blocks_wide_lookup[bsize];
   int mv_idx = (this_mode == NEWFROMNEARMV) ? 1 : 0;
   int_mv single_newmv[TOTAL_REFS_PER_FRAME];
@@ -6706,6 +6697,7 @@
   uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
 #endif  // CONFIG_VAR_TX
   int64_t best_distortion = INT64_MAX;
+  int64_t best_rd = INT64_MAX;
   MB_MODE_INFO best_mbmi;
 #if CONFIG_EXT_INTER
   int rate2_bmc_nocoeff;
@@ -6713,24 +6705,14 @@
   MB_MODE_INFO best_bmc_mbmi;
 #endif  // CONFIG_EXT_INTER
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
-
-  int pred_exists = 0;
-  int intpel_mv;
-  int64_t rd, tmp_rd, best_rd = INT64_MAX;
-  int best_needs_copy = 0;
+  int64_t rd = INT64_MAX;
+  int64_t tmp_rd = INT64_MAX;
   uint8_t *orig_dst[MAX_MB_PLANE];
   int orig_dst_stride[MAX_MB_PLANE];
+  uint8_t *tmp_dst[MAX_MB_PLANE];
+  int tmp_dst_stride[MAX_MB_PLANE];
   int rs = 0;
-#if CONFIG_DUAL_FILTER
-  // Index use case:
-  // {0, 1} -> (vertical, horizontal) filter types for the first ref frame
-  // {2, 3} -> (vertical, horizontal) filter types for the second ref frame
-  InterpFilter best_filter[4] = {
-    SWITCHABLE, SWITCHABLE, SWITCHABLE, SWITCHABLE,
-  };
-#else
-  InterpFilter best_filter = SWITCHABLE;
-#endif
+  InterpFilter assign_filter = SWITCHABLE;
 
   int skip_txfm_sb = 0;
   int64_t skip_sse_sb = INT64_MAX;
@@ -6966,6 +6948,10 @@
   // one for future predictions. In the end, copy from tmp_buf to
   // dst if necessary.
   for (i = 0; i < MAX_MB_PLANE; i++) {
+    tmp_dst[i] = tmp_buf + i * MAX_SB_SQUARE;
+    tmp_dst_stride[i] = MAX_SB_SIZE;
+  }
+  for (i = 0; i < MAX_MB_PLANE; i++) {
     orig_dst[i] = xd->plane[i].dst.buf;
     orig_dst_stride[i] = xd->plane[i].dst.stride;
   }
@@ -7003,135 +6989,125 @@
       )
     return INT64_MAX;
 
-  pred_exists = 0;
-  // Are all MVs integer pel for Y and UV
-  intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
-  if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
-
+  if (cm->interp_filter == SWITCHABLE) {
 #if !CONFIG_DUAL_FILTER
-  best_filter =
-      predict_interp_filter(cpi, x, bsize, mi_row, mi_col, single_filter);
+    assign_filter =
+        predict_interp_filter(cpi, x, bsize, mi_row, mi_col, single_filter);
 #endif
+#if CONFIG_EXT_INTERP || CONFIG_DUAL_FILTER
+    if (!av1_is_interp_needed(xd)) assign_filter = EIGHTTAP_REGULAR;
+#endif
+  } else {
+    assign_filter = cm->interp_filter;
+  }
 
-  if (cm->interp_filter != BILINEAR) {
-    int newbest;
-    int tmp_rate_sum = 0;
-    int64_t tmp_dist_sum = 0;
-
+  {  // Do interpolation filter search in the parentheses
+    int tmp_rate;
+    int64_t tmp_dist;
 #if CONFIG_DUAL_FILTER
-    for (i = 0; i < SWITCHABLE_FILTERS * SWITCHABLE_FILTERS; ++i)
+    mbmi->interp_filter[0] =
+        assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
+    mbmi->interp_filter[1] =
+        assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
+    mbmi->interp_filter[2] =
+        assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
+    mbmi->interp_filter[3] =
+        assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
 #else
-    for (i = 0; i < SWITCHABLE_FILTERS; ++i)
+    mbmi->interp_filter =
+        assign_filter == SWITCHABLE ? EIGHTTAP_REGULAR : assign_filter;
 #endif
-    {
-      int j;
-      int64_t rs_rd;
-      int tmp_skip_sb = 0;
-      int64_t tmp_skip_sse = INT64_MAX;
+    rs = av1_get_switchable_rate(cpi, xd);
+    av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+    model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+                    &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
+    rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
 
+    if (assign_filter == SWITCHABLE) {
+      // do interp_filter search
+      if (av1_is_interp_needed(xd)) {
+        int best_in_temp = 0;
 #if CONFIG_DUAL_FILTER
-      mbmi->interp_filter[0] = filter_sets[i][0];
-      mbmi->interp_filter[1] = filter_sets[i][1];
-      mbmi->interp_filter[2] = filter_sets[i][0];
-      mbmi->interp_filter[3] = filter_sets[i][1];
+        InterpFilter best_filter[4];
+        av1_copy(best_filter, mbmi->interp_filter);
 #else
-      mbmi->interp_filter = i;
+        InterpFilter best_filter = mbmi->interp_filter;
 #endif
-      rs = av1_get_switchable_rate(cpi, xd);
-      rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+        restore_dst_buf(xd, tmp_dst, tmp_dst_stride);
+#if CONFIG_DUAL_FILTER
+        // EIGHTTAP_REGULAR mode is calculated beforehand
+        for (i = 1; i < SWITCHABLE_FILTERS * SWITCHABLE_FILTERS; ++i)
+#else
+        // EIGHTTAP_REGULAR mode is calculated beforehand
+        for (i = 1; i < SWITCHABLE_FILTERS; ++i)
+#endif
+        {
+          int tmp_skip_sb = 0;
+          int64_t tmp_skip_sse = INT64_MAX;
+          int tmp_rs;
+#if CONFIG_DUAL_FILTER
+          mbmi->interp_filter[0] = filter_sets[i][0];
+          mbmi->interp_filter[1] = filter_sets[i][1];
+          mbmi->interp_filter[2] = filter_sets[i][0];
+          mbmi->interp_filter[3] = filter_sets[i][1];
+#else
+          mbmi->interp_filter = i;
+#endif
+          tmp_rs = av1_get_switchable_rate(cpi, xd);
+          av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+          model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
+                          &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
+          tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
 
-      if (i > 0 && intpel_mv && IsInterpolatingFilter(i)) {
-        rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
-        if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
-      } else {
-        int rate_sum = 0;
-        int64_t dist_sum = 0;
-        if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
-#if CONFIG_EXT_INTER
-            is_comp_interintra_pred ||
-#endif  // CONFIG_EXT_INTER
-            (cm->interp_filter != SWITCHABLE &&
-             (
+          if (tmp_rd < rd) {
+            rd = tmp_rd;
+            rs = av1_get_switchable_rate(cpi, xd);
 #if CONFIG_DUAL_FILTER
-                 cm->interp_filter == mbmi->interp_filter[0]
+            av1_copy(best_filter, mbmi->interp_filter);
 #else
-                 cm->interp_filter == mbmi->interp_filter
+            best_filter = mbmi->interp_filter;
 #endif
-                 || (i == 0 && intpel_mv && IsInterpolatingFilter(i))))) {
-          restore_dst_buf(xd, orig_dst, orig_dst_stride);
-        } else {
-          for (j = 0; j < MAX_MB_PLANE; j++) {
-            xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
-            xd->plane[j].dst.stride = MAX_SB_SIZE;
+            skip_txfm_sb = tmp_skip_sb;
+            skip_sse_sb = tmp_skip_sse;
+            best_in_temp = !best_in_temp;
+            if (best_in_temp) {
+              restore_dst_buf(xd, orig_dst, orig_dst_stride);
+            } else {
+              restore_dst_buf(xd, tmp_dst, tmp_dst_stride);
+            }
           }
         }
-        av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &rate_sum,
-                        &dist_sum, &tmp_skip_sb, &tmp_skip_sse);
-
-        rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
-        if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
-
-        if (i == 0 && intpel_mv && IsInterpolatingFilter(i)) {
-          tmp_rate_sum = rate_sum;
-          tmp_dist_sum = dist_sum;
+        if (best_in_temp) {
+          restore_dst_buf(xd, tmp_dst, tmp_dst_stride);
+        } else {
+          restore_dst_buf(xd, orig_dst, orig_dst_stride);
         }
-      }
-      newbest = i == 0 || rd < best_rd;
-
-      if (newbest) {
-        best_rd = rd;
 #if CONFIG_DUAL_FILTER
-        best_filter[0] = mbmi->interp_filter[0];
-        best_filter[1] = mbmi->interp_filter[1];
-        best_filter[2] = mbmi->interp_filter[2];
-        best_filter[3] = mbmi->interp_filter[3];
+        av1_copy(mbmi->interp_filter, best_filter);
 #else
-        best_filter = mbmi->interp_filter;
+        mbmi->interp_filter = best_filter;
 #endif
-        if (cm->interp_filter == SWITCHABLE && i &&
-            !(intpel_mv && IsInterpolatingFilter(i)))
-          best_needs_copy = !best_needs_copy;
-      }
-
-      if ((cm->interp_filter == SWITCHABLE && newbest) ||
-          (cm->interp_filter != SWITCHABLE &&
-#if CONFIG_DUAL_FILTER
-           cm->interp_filter == mbmi->interp_filter[0]
-#else
-           cm->interp_filter == mbmi->interp_filter
-#endif
-           )) {
-        pred_exists = 1;
-        tmp_rd = best_rd;
-
-        skip_txfm_sb = tmp_skip_sb;
-        skip_sse_sb = tmp_skip_sse;
       } else {
-        pred_exists = 0;
+#if !CONFIG_EXT_INTERP && !CONFIG_DUAL_FILTER
+        int tmp_rs;
+        InterpFilter best_filter = mbmi->interp_filter;
+        rs = av1_get_switchable_rate(cpi, xd);
+        for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
+          mbmi->interp_filter = i;
+          tmp_rs = av1_get_switchable_rate(cpi, xd);
+          if (tmp_rs < rs) {
+            rs = tmp_rs;
+            best_filter = i;
+          }
+        }
+        mbmi->interp_filter = best_filter;
+#else
+        assert(0);
+#endif
       }
     }
-    restore_dst_buf(xd, orig_dst, orig_dst_stride);
   }
 
-// Set the appropriate filter
-#if CONFIG_DUAL_FILTER
-  mbmi->interp_filter[0] =
-      cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[0];
-  mbmi->interp_filter[1] =
-      cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[1];
-  if (mbmi->ref_frame[1] > INTRA_FRAME) {
-    mbmi->interp_filter[2] =
-        cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[2];
-    mbmi->interp_filter[3] =
-        cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter[3];
-  }
-#else
-  mbmi->interp_filter =
-      cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
-#endif
-  rs = cm->interp_filter == SWITCHABLE ? av1_get_switchable_rate(cpi, xd) : 0;
-
 #if CONFIG_EXT_INTER
 #if CONFIG_MOTION_VAR
   best_bmc_mbmi = *mbmi;
@@ -7432,29 +7408,15 @@
     pred_exists = 0;
   }
 #endif  // CONFIG_EXT_INTERP
-#endif  // CONFIG_EXT_INTER
-
-  if (pred_exists) {
-    if (best_needs_copy) {
-      // again temporarily set the buffers to local memory to prevent a memcpy
-      for (i = 0; i < MAX_MB_PLANE; i++) {
-        xd->plane[i].dst.buf = tmp_buf + i * MAX_SB_SQUARE;
-        xd->plane[i].dst.stride = MAX_SB_SIZE;
-      }
-    }
-    rd = tmp_rd;
-  } else {
+  if (pred_exists == 0) {
     int tmp_rate;
     int64_t tmp_dist;
-
-    // Handles the special case when a filter that is not in the
-    // switchable list (ex. bilinear) is indicated at the frame level, or
-    // skip condition holds.
     av1_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
     model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
                     &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
   }
+#endif  // CONFIG_EXT_INTER
 
 #if CONFIG_DUAL_FILTER
   if (!is_comp_pred) single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
@@ -8054,7 +8016,7 @@
     // (prediction granularity), so we account for it in the full rate,
     // not the tokenonly rate.
     rate_y -= cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
-                               [mbmi->tx_size];
+                               [tx_size_to_depth(mbmi->tx_size)];
   }
 
   rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
@@ -8811,8 +8773,9 @@
         // tokenonly rate, but for intra blocks, tx_size is always coded
         // (prediction granularity), so we account for it in the full rate,
         // not the tokenonly rate.
-        rate_y -= cpi->tx_size_cost[max_tx_size - TX_8X8]
-                                   [get_tx_size_context(xd)][mbmi->tx_size];
+        rate_y -=
+            cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
+                             [tx_size_to_depth(mbmi->tx_size)];
       }
 #if CONFIG_EXT_INTRA
       if (is_directional_mode) {
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 43ed837..47cc02a 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -375,7 +375,7 @@
 
 static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
                              TX_SIZE tx_size) {
-  const int eob_max = num_4x4_blocks_txsize_lookup[tx_size] << 4;
+  const int eob_max = tx_size_2d[tx_size];
   return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
 }
 
@@ -574,15 +574,19 @@
   const int tx_col = blk_col >> (1 - pd->subsampling_x);
   TX_SIZE plane_tx_size;
 
-  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
-  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+  int max_blocks_high = block_size_high[plane_bsize];
+  int max_blocks_wide = block_size_wide[plane_bsize];
 
   assert(tx_size < TX_SIZES);
 
   if (xd->mb_to_bottom_edge < 0)
-    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+    max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
   if (xd->mb_to_right_edge < 0)
-    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+    max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
+
+  // Scale to the transform block unit.
+  max_blocks_high >>= tx_size_wide_log2[0];
+  max_blocks_wide >>= tx_size_wide_log2[0];
 
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
 
@@ -600,20 +604,23 @@
     else if (dry_run == DRY_RUN_COSTCOEFFS)
       cost_coeffs_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
   } else {
-    int bsl = b_width_log2_lookup[bsize];
+    // Half the block size in transform block unit.
+    int bsl = block_size_wide[bsize] >> (tx_size_wide_log2[0] + 1);
     int i;
 
     assert(bsl > 0);
-    --bsl;
 
     for (i = 0; i < 4; ++i) {
-      const int offsetr = blk_row + ((i >> 1) << bsl);
-      const int offsetc = blk_col + ((i & 0x01) << bsl);
-      int step = num_4x4_blocks_txsize_lookup[tx_size - 1];
+      const int offsetr = blk_row + ((i >> 1) * bsl);
+      const int offsetc = blk_col + ((i & 0x01) * bsl);
+
+      // TODO(jingning): Fix this tx_size transition.
+      const TX_SIZE sub_txs = tx_size - 1;
+      int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
 
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
 
-      tokenize_vartx(td, t, dry_run, tx_size - 1, plane_bsize, offsetr, offsetc,
+      tokenize_vartx(td, t, dry_run, sub_txs, plane_bsize, offsetr, offsetc,
                      block + i * step, plane, arg);
     }
   }
@@ -649,16 +656,17 @@
   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
     const struct macroblockd_plane *const pd = &xd->plane[plane];
     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
-    const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
-    const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+    const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
+    const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
     const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
     const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
-    int bh = num_4x4_blocks_wide_lookup[txb_size];
+    int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0];
+    int bh = block_size_high[txb_size] >> tx_size_wide_log2[0];
     int idx, idy;
     int block = 0;
-    int step = num_4x4_blocks_txsize_lookup[max_tx_size];
+    int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
     for (idy = 0; idy < mi_height; idy += bh) {
-      for (idx = 0; idx < mi_width; idx += bh) {
+      for (idx = 0; idx < mi_width; idx += bw) {
         tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
                        block, plane, &arg);
         block += step;
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index e4179ef..cb2fbd5 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -436,6 +436,15 @@
                                  &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
 #endif  // HAVE_AVX2 && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 
+#if HAVE_AVX2 && CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Trans32x32Test,
+    ::testing::Values(make_tuple(&aom_fdct32x32_avx2,
+                                 &aom_idct32x32_1024_add_sse2, 0, AOM_BITS_8),
+                      make_tuple(&aom_fdct32x32_rd_avx2,
+                                 &aom_idct32x32_1024_add_sse2, 1, AOM_BITS_8)));
+#endif  // HAVE_AVX2 && CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
 #if HAVE_MSA && !CONFIG_AOM_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
     MSA, Trans32x32Test,
diff --git a/test/ethread_test.cc b/test/ethread_test.cc
index 6b2f1ea..c72f16d 100644
--- a/test/ethread_test.cc
+++ b/test/ethread_test.cc
@@ -89,6 +89,7 @@
         encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
         encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
         encoder->Control(AOME_SET_ARNR_TYPE, 3);
+        encoder->Control(AV1E_SET_FRAME_PARALLEL_DECODING, 0);
       } else {
         encoder->Control(AOME_SET_ENABLEAUTOALTREF, 0);
         encoder->Control(AV1E_SET_AQ_MODE, 3);
@@ -180,5 +181,5 @@
 AV1_INSTANTIATE_TEST_CASE(AVxEncoderThreadTestLarge,
                           ::testing::Values(::libaom_test::kTwoPassGood,
                                             ::libaom_test::kOnePassGood),
-                          ::testing::Range(1, 3));
+                          ::testing::Range(0, 3));
 }  // namespace
diff --git a/test/fht32x32_test.cc b/test/fht32x32_test.cc
index 1f85761..8545b2c 100644
--- a/test/fht32x32_test.cc
+++ b/test/fht32x32_test.cc
@@ -90,12 +90,11 @@
   IhtFunc inv_txfm_;
 };
 
-// TODO(luoyi): Owing to the range check in DCT_DCT of av1_fht32x32_avx2, as
-// input is out of the range, we use aom_fdct32x32_avx2. However this function
-// does not support CONFIG_AOM_HIGHBITDEPTH. I need to fix the scaling/rounding
-// of av1_fht32x32_avx2 then add this test on CONFIG_AOM_HIGHBITDEPTH.
-#if !CONFIG_AOM_HIGHBITDEPTH
 TEST_P(AV1Trans32x32HT, CoeffCheck) { RunCoeffCheck(); }
+// TODO(luoyi): As CONFIG_AOM_HIGHBITDEPTH = 1, our AVX2 implementation of
+// av1_fht32x32 does not support tran_low_t (int32_t) as intermediate result.
+// Therefore MemCheck test, tx_type=1,2,...,8 can't pass the test yet.
+#if !CONFIG_AOM_HIGHBITDEPTH
 TEST_P(AV1Trans32x32HT, MemCheck) { RunMemCheck(); }
 #endif