Enable 1:4/4:1 transform for 8x16 and 16x8 luma blocks

It gives 0.1% gain on lowres and midres

Change-Id: I555a492a68571c525713840d73aa5614fe80a87d
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 676b471..826cc51 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -761,24 +761,24 @@
 
 static INLINE TxSetType get_ext_tx_set_type(TX_SIZE tx_size, BLOCK_SIZE bs,
                                             int is_inter, int use_reduced_set) {
-  const TX_SIZE tx_size2 = txsize_sqr_up_map[tx_size];
-  tx_size = txsize_sqr_map[tx_size];
+  const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
+  const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
 #if CONFIG_CB4X4 && USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
   (void)bs;
-  if (tx_size > TX_32X32) return EXT_TX_SET_DCTONLY;
+  if (tx_size_sqr > TX_32X32) return EXT_TX_SET_DCTONLY;
 #else
-  if (tx_size > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
+  if (tx_size_sqr > TX_32X32 || bs < BLOCK_8X8) return EXT_TX_SET_DCTONLY;
 #endif
   if (use_reduced_set)
     return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
-  if (tx_size2 == TX_32X32)
+  if (tx_size_sqr_up == TX_32X32)
     return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY;
   if (is_inter)
-    return (tx_size == TX_16X16 ? EXT_TX_SET_DTT9_IDTX_1DDCT
-                                : EXT_TX_SET_ALL16);
+    return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT9_IDTX_1DDCT
+                                    : EXT_TX_SET_ALL16);
   else
-    return (tx_size == TX_16X16 ? EXT_TX_SET_DTT4_IDTX
-                                : EXT_TX_SET_DTT4_IDTX_1DDCT);
+    return (tx_size_sqr == TX_16X16 ? EXT_TX_SET_DTT4_IDTX
+                                    : EXT_TX_SET_DTT4_IDTX_1DDCT);
 }
 
 static INLINE int get_ext_tx_set(TX_SIZE tx_size, BLOCK_SIZE bs, int is_inter,
@@ -887,6 +887,45 @@
   return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
          !xd->lossless[mbmi->segment_id];
 }
+
+#if CONFIG_RECT_TX_EXT
+static INLINE int is_quarter_tx_allowed_bsize(BLOCK_SIZE bsize) {
+  static const char LUT_QTTX[BLOCK_SIZES] = {
+#if CONFIG_CB4X4
+    0,  // BLOCK_2X2
+    0,  // BLOCK_2X4
+    0,  // BLOCK_4X2
+#endif
+    0,  // BLOCK_4X4
+    0,  // BLOCK_4X8
+    0,  // BLOCK_8X4
+    0,  // BLOCK_8X8
+    1,  // BLOCK_8X16
+    1,  // BLOCK_16X8
+    0,  // BLOCK_16X16
+    0,  // BLOCK_16X32
+    0,  // BLOCK_32X16
+    0,  // BLOCK_32X32
+    0,  // BLOCK_32X64
+    0,  // BLOCK_64X32
+    0,  // BLOCK_64X64
+#if CONFIG_EXT_PARTITION
+    0,  // BLOCK_64X128
+    0,  // BLOCK_128X64
+    0,  // BLOCK_128X128
+#endif  // CONFIG_EXT_PARTITION
+  };
+
+  return LUT_QTTX[bsize];
+}
+
+static INLINE int is_quarter_tx_allowed(const MACROBLOCKD *xd,
+                                        const MB_MODE_INFO *mbmi,
+                                        int is_inter) {
+  return is_quarter_tx_allowed_bsize(mbmi->sb_type) && is_inter &&
+         !xd->lossless[mbmi->segment_id];
+}
+#endif  // CONFIG_RECT_TX_EXT
 #endif  // CONFIG_RECT_TX
 #endif  // CONFIG_EXT_TX
 
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index e4a360b..79c130f 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -21,105 +21,117 @@
 #endif
 
 #if CONFIG_EXT_PARTITION
-#define IF_EXT_PARTITION(...) __VA_ARGS__
+#define IF_EXT_PARTITION(...) __VA_ARGS__,
 #else
 #define IF_EXT_PARTITION(...)
 #endif
 
 // Log 2 conversion lookup tables for block width and height
-static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = {
+static const uint8_t b_width_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   0, 0, 0,
 #endif
-  0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)
+  0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5) 0, 2, 1, 3
 };
-static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = {
+static const uint8_t b_height_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   0, 0, 0,
 #endif
-  0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)
+  0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5) 2, 0, 3, 1
 };
 // Log 2 conversion lookup tables for modeinfo width and height
-static const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = {
+static const uint8_t mi_width_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
-  0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5)
+  0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, IF_EXT_PARTITION(4, 5, 5) 0,
+  2, 1, 3
 #else
-  0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4)
+  0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, IF_EXT_PARTITION(3, 4, 4) 0, 1, 0, 2
 #endif
 };
-static const uint8_t mi_height_log2_lookup[BLOCK_SIZES] = {
+static const uint8_t mi_height_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
-  0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5)
+  0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4, IF_EXT_PARTITION(5, 4, 5) 2,
+  0, 3, 1
 #else
-  0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4)
+  0, 0, 0, 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, IF_EXT_PARTITION(4, 3, 4) 1, 0, 2, 0
 #endif
 };
 
-static const uint8_t mi_size_wide[BLOCK_SIZES] = {
+/* clang-format off */
+static const uint8_t mi_size_wide[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
-  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)
+  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16,
+  IF_EXT_PARTITION(16, 32, 32)  1, 4, 2, 8
 #else
-  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)
+  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) 1, 2, 1, 4
 #endif
 };
-static const uint8_t mi_size_high[BLOCK_SIZES] = {
+static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
-  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)
+  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16,
+  IF_EXT_PARTITION(32, 16, 32)  4, 1, 8, 2
 #else
-  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)
+  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) 2, 1, 4, 1
 #endif
 };
+/* clang-format on */
 
 // Width/height lookup tables in units of various block sizes
-static const uint8_t block_size_wide[BLOCK_SIZES] = {
+static const uint8_t block_size_wide[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
-  2, 2, 4,
+  2,  2,  4,
 #endif
-  4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, IF_EXT_PARTITION(64, 128, 128)
+  4,  4,  8,  8,  8,  16, 16,
+  16, 32, 32, 32, 64, 64, IF_EXT_PARTITION(64, 128, 128) 4,
+  16, 8,  32
 };
 
-static const uint8_t block_size_high[BLOCK_SIZES] = {
+static const uint8_t block_size_high[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
-  2, 4, 2,
+  2,  4,  2,
 #endif
-  4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64, IF_EXT_PARTITION(128, 64, 128)
+  4,  8,  4,  8,  16, 8,  16,
+  32, 16, 32, 64, 32, 64, IF_EXT_PARTITION(128, 64, 128) 16,
+  4,  32, 8
 };
 
-static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = {
+static const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   1, 1, 1,
 #endif
-  1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32)
+  1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16, IF_EXT_PARTITION(16, 32, 32) 1,
+  4, 2, 8
 };
-static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = {
+static const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   1, 1, 1,
 #endif
-  1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32)
+  1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16, IF_EXT_PARTITION(32, 16, 32) 4,
+  1, 8, 2
 };
-static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = {
+static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   1, 1, 1,
 #endif
-  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16)
+  1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, IF_EXT_PARTITION(8, 16, 16) 1, 2, 1, 4
 };
-static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = {
+static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   1, 1, 1,
 #endif
-  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16)
+  1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, IF_EXT_PARTITION(16, 8, 16) 2, 1, 4, 1
 };
-static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES] = {
+static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   1, 1, 1,
 #endif
-  1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8)
+  1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, IF_EXT_PARTITION(4, 8, 8) 1, 1, 1, 2
 };
-static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = {
+static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   1, 1, 1,
 #endif
-  1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)
+  1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8) 1, 1, 2, 1
 };
 
 // AOMMIN(3, AOMMIN(b_width_log2(bsize), b_height_log2(bsize)))
@@ -130,11 +142,12 @@
   0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, IF_EXT_PARTITION(3, 3, 3)
 };
 
-static const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = {
+static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = {
 #if CONFIG_CB4X4
   2, 3, 3,
 #endif
-  4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14)
+  4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12, IF_EXT_PARTITION(13, 13, 14) 6,
+  6, 8, 8
 };
 
 /* clang-format off */
@@ -487,6 +500,29 @@
 #endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_TX64X64
 };
+
+#if CONFIG_EXT_TX && CONFIG_RECT_TX_EXT
+static const TX_SIZE quarter_txsize_lookup[BLOCK_SIZES] = {
+#if CONFIG_CB4X4
+  //     2X2,        2X4,        4X2,
+  TX_INVALID, TX_INVALID, TX_INVALID,
+#endif
+  //                             4x4,
+                          TX_INVALID,
+  //     4x8,        8x4,        8x8,
+  TX_INVALID, TX_INVALID, TX_INVALID,
+  // 8x16, 16x8, 16x16,
+  TX_4X16, TX_16X4, TX_INVALID,
+  // 16x32, 32x16, 32x32,
+  TX_8X32, TX_32X8, TX_INVALID,
+  // 32x64, 64x32, 64x64
+  TX_INVALID, TX_INVALID, TX_INVALID,
+#if CONFIG_EXT_PARTITION
+  // 64x128, 128x64, 128x128
+  TX_INVALID, TX_INVALID, TX_INVALID,
+#endif
+};
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX_EXT
 #else
 #define max_txsize_rect_lookup max_txsize_lookup
 #endif  // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
@@ -769,18 +805,18 @@
   BLOCK_16X16,  // TX_16X16
   BLOCK_32X32,  // TX_32X32
 #if CONFIG_TX64X64
-  BLOCK_64X64,    // TX_64X64
-#endif            // CONFIG_TX64X64
-  BLOCK_4X8,      // TX_4X8
-  BLOCK_8X4,      // TX_8X4
-  BLOCK_8X16,     // TX_8X16
-  BLOCK_16X8,     // TX_16X8
-  BLOCK_16X32,    // TX_16X32
-  BLOCK_32X16,    // TX_32X16
-  BLOCK_INVALID,  // TX_4X16
-  BLOCK_INVALID,  // TX_16X4
-  BLOCK_INVALID,  // TX_8X32
-  BLOCK_INVALID,  // TX_32X8
+  BLOCK_64X64,  // TX_64X64
+#endif          // CONFIG_TX64X64
+  BLOCK_4X8,    // TX_4X8
+  BLOCK_8X4,    // TX_8X4
+  BLOCK_8X16,   // TX_8X16
+  BLOCK_16X8,   // TX_16X8
+  BLOCK_16X32,  // TX_16X32
+  BLOCK_32X16,  // TX_32X16
+  BLOCK_4X16,   // TX_4X16
+  BLOCK_16X4,   // TX_16X4
+  BLOCK_8X32,   // TX_8X32
+  BLOCK_32X8,   // TX_32X8
 };
 
 static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
@@ -1068,7 +1104,7 @@
       { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
       { { TX_8X16, TX_8X8 }, { TX_4X8, TX_4X8 } },
-      { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
+      { { TX_4X16, TX_4X8 }, { TX_4X16, TX_4X8 } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
@@ -1092,7 +1128,7 @@
       { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
       { { TX_16X8, TX_8X4 }, { TX_8X8, TX_8X4 } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
-      { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
+      { { TX_16X4, TX_16X4 }, { TX_8X4, TX_8X4 } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
   },
@@ -1139,7 +1175,7 @@
       { { TX_16X32, TX_16X16 }, { TX_8X16, TX_8X16 } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
-      { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
+      { { TX_8X32, TX_8X16 }, { TX_4X16, TX_4X16 } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
   },
   {
@@ -1163,7 +1199,7 @@
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
       { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
-      { { TX_INVALID, TX_INVALID }, { TX_INVALID, TX_INVALID } },
+      { { TX_32X8, TX_32X8 }, { TX_16X8, TX_16X4 } },
   },
   {
 // BLOCK_32X32
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index 244d313..793db03 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -323,6 +323,24 @@
                    *(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
       break;
 #endif  // CONFIG_TX64X64
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    case TX_4X16:
+      above_ec = !!*(const uint16_t *)a;
+      left_ec = !!*(const uint64_t *)l;
+      break;
+    case TX_16X4:
+      above_ec = !!*(const uint64_t *)a;
+      left_ec = !!*(const uint16_t *)l;
+      break;
+    case TX_8X32:
+      above_ec = !!*(const uint32_t *)a;
+      left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
+      break;
+    case TX_32X8:
+      above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
+      left_ec = !!*(const uint32_t *)l;
+      break;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
     default: assert(0 && "Invalid transform size."); break;
   }
   return combine_entropy_contexts(above_ec, left_ec);
@@ -375,6 +393,24 @@
       left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
       break;
 #endif  // CONFIG_TX64X64
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    case TX_4X16:
+      above_ec = a[0] != 0;
+      left_ec = !!*(const uint32_t *)l;
+      break;
+    case TX_16X4:
+      above_ec = !!*(const uint32_t *)a;
+      left_ec = l[0] != 0;
+      break;
+    case TX_8X32:
+      above_ec = !!*(const uint16_t *)a;
+      left_ec = !!*(const uint64_t *)l;
+      break;
+    case TX_32X8:
+      above_ec = !!*(const uint64_t *)a;
+      left_ec = !!*(const uint16_t *)l;
+      break;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
     default: assert(0 && "Invalid transform size."); break;
   }
   return combine_entropy_contexts(above_ec, left_ec);
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index b3894f7..6da7e67 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1663,6 +1663,10 @@
 #endif  // CONFIG_TX64X64
                                           };
 
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+static const aom_prob default_quarter_tx_size_prob = 192;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+
 #if CONFIG_LOOP_RESTORATION
 const aom_tree_index
     av1_switchable_restore_tree[TREE_SIZE(RESTORE_SWITCHABLE_TYPES)] = {
@@ -4337,6 +4341,9 @@
   av1_copy(fc->comp_inter_mode_prob, default_comp_inter_mode_p);
 #endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
   av1_copy(fc->tx_size_probs, default_tx_size_prob);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+  fc->quarter_tx_size_prob = default_quarter_tx_size_prob;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 #if CONFIG_VAR_TX
   av1_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
 #endif
@@ -4669,6 +4676,10 @@
         aom_tree_merge_probs(av1_tx_size_tree[i], pre_fc->tx_size_probs[i][j],
                              counts->tx_size[i][j], fc->tx_size_probs[i][j]);
     }
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    fc->quarter_tx_size_prob = av1_mode_mv_merge_probs(
+        pre_fc->quarter_tx_size_prob, counts->quarter_tx_size);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
   }
 
 #if CONFIG_VAR_TX
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 8e58d90..e26efe4 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -222,6 +222,9 @@
   aom_prob comp_inter_mode_prob[COMP_INTER_MODE_CONTEXTS];
 #endif  // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
   aom_prob tx_size_probs[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][MAX_TX_DEPTH];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+  aom_prob quarter_tx_size_prob;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 #if CONFIG_VAR_TX
   aom_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
 #endif
@@ -399,6 +402,9 @@
   // belong into this structure.
   unsigned int tx_size_totals[TX_SIZES];
   unsigned int tx_size[MAX_TX_DEPTH][TX_SIZE_CONTEXTS][TX_SIZES];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+  unsigned int quarter_tx_size[2];
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 #if CONFIG_VAR_TX
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
 #endif
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 118cf6a..4f53384 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -111,8 +111,13 @@
   BLOCK_128X64,
   BLOCK_128X128,
 #endif  // CONFIG_EXT_PARTITION
-  BLOCK_SIZES,
-  BLOCK_INVALID = BLOCK_SIZES,
+  BLOCK_4X16,
+  BLOCK_16X4,
+  BLOCK_8X32,
+  BLOCK_32X8,
+  BLOCK_SIZES_ALL,
+  BLOCK_SIZES = BLOCK_4X16,
+  BLOCK_INVALID = 255,
   BLOCK_LARGEST = (BLOCK_SIZES - 1)
 } BLOCK_SIZE;
 
diff --git a/av1/common/idct.c b/av1/common/idct.c
index c3d870c..5275df5 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -1377,7 +1377,7 @@
 }
 
 // These will be used by the masked-tx experiment in the future.
-#if CONFIG_MASKED_TX && 0
+#if CONFIG_RECT_TX && CONFIG_EXT_TX && CONFIG_RECT_TX_EXT
 static void inv_txfm_add_4x16(const tran_low_t *input, uint8_t *dest,
                               int stride, int eob, TX_TYPE tx_type) {
   (void)eob;
@@ -1401,7 +1401,7 @@
   (void)eob;
   av1_iht32x8_256_add(input, dest, stride, tx_type);
 }
-#endif  // CONFIG_MASKED_TX
+#endif
 
 static void inv_txfm_add_8x16(const tran_low_t *input, uint8_t *dest,
                               int stride, int eob, TX_TYPE tx_type) {
@@ -2799,6 +2799,12 @@
       inv_txfm_add_2x2(input, dest, stride, eob, tx_type, lossless);
       break;
 #endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    case TX_32X8: inv_txfm_add_32x8(input, dest, stride, eob, tx_type); break;
+    case TX_8X32: inv_txfm_add_8x32(input, dest, stride, eob, tx_type); break;
+    case TX_16X4: inv_txfm_add_16x4(input, dest, stride, eob, tx_type); break;
+    case TX_4X16: inv_txfm_add_4x16(input, dest, stride, eob, tx_type); break;
+#endif
     default: assert(0 && "Invalid transform size"); break;
   }
 }
diff --git a/av1/common/scan.c b/av1/common/scan.c
index 585b319..a3c7c09 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -73,17 +73,17 @@
 };
 
 DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = {
-  0,  1,  4,  2,  5,  8,  3,  6,  9,  12, 7,  10, 13, 16, 11, 14,
-  17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 32, 27, 30,
-  33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46,
-  49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63,
+  0,  1,  4,  5,  2,  8,  6,  9,  10, 3,  12, 7,  13, 11, 14, 16,
+  17, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
 };
 
 DECLARE_ALIGNED(16, static const int16_t, default_scan_16x4[64]) = {
-  0,  1,  16, 2,  17, 32, 3,  18, 33, 48, 4,  19, 34, 49, 5,  20,
-  35, 50, 6,  21, 36, 51, 7,  22, 37, 52, 8,  23, 38, 53, 9,  24,
-  39, 54, 10, 25, 40, 55, 11, 26, 41, 56, 12, 27, 42, 57, 13, 28,
-  43, 58, 14, 29, 44, 59, 15, 30, 45, 60, 31, 46, 61, 47, 62, 63,
+  0,  1,  16, 17, 2,  32, 18, 33, 34, 3,  48, 19, 49, 35, 50, 4,
+  20, 51, 36, 5,  52, 21, 37, 53, 6,  22, 38, 54, 7,  23, 39, 55,
+  8,  24, 40, 56, 9,  25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59,
+  12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63,
 };
 
 #if CONFIG_EXT_TX
@@ -1582,24 +1582,24 @@
 
 DECLARE_ALIGNED(16, static const int16_t,
                 default_scan_4x16_neighbors[65 * MAX_NEIGHBORS]) = {
-  0,  0,  0,  0,  0,  0,  1,  1,  1,  4,  4,  4,  2,  2,  2,  5,  5,  8,  8,
-  8,  3,  6,  6,  9,  9,  12, 12, 12, 7,  10, 10, 13, 13, 16, 16, 16, 11, 14,
-  14, 17, 17, 20, 20, 20, 15, 18, 18, 21, 21, 24, 24, 24, 19, 22, 22, 25, 25,
-  28, 28, 28, 23, 26, 26, 29, 29, 32, 32, 32, 27, 30, 30, 33, 33, 36, 36, 36,
-  31, 34, 34, 37, 37, 40, 40, 40, 35, 38, 38, 41, 41, 44, 44, 44, 39, 42, 42,
-  45, 45, 48, 48, 48, 43, 46, 46, 49, 49, 52, 52, 52, 47, 50, 50, 53, 53, 56,
-  56, 56, 51, 54, 54, 57, 57, 60, 55, 58, 58, 61, 59, 62, 0,  0
+  0,  0,  0,  0,  0,  0,  1,  4,  1,  1,  4,  4,  2,  5,  5,  8,  6,  9,  2,
+  2,  8,  8,  3,  6,  9,  12, 7,  10, 10, 13, 12, 12, 13, 16, 11, 14, 14, 17,
+  15, 18, 16, 16, 17, 20, 18, 21, 19, 22, 20, 20, 21, 24, 22, 25, 23, 26, 24,
+  24, 25, 28, 26, 29, 27, 30, 28, 28, 29, 32, 30, 33, 31, 34, 32, 32, 33, 36,
+  34, 37, 35, 38, 36, 36, 37, 40, 38, 41, 39, 42, 40, 40, 41, 44, 42, 45, 43,
+  46, 44, 44, 45, 48, 46, 49, 47, 50, 48, 48, 49, 52, 50, 53, 51, 54, 52, 52,
+  53, 56, 54, 57, 55, 58, 56, 56, 57, 60, 58, 61, 59, 62, 0,  0
 };
 
 DECLARE_ALIGNED(16, static const int16_t,
                 default_scan_16x4_neighbors[65 * MAX_NEIGHBORS]) = {
-  0,  0,  0,  0,  0,  0,  1,  1,  1,  16, 16, 16, 2,  2,  2,  17, 17, 32, 32,
-  32, 3,  3,  3,  18, 18, 33, 33, 48, 4,  4,  4,  19, 19, 34, 34, 49, 5,  5,
-  5,  20, 20, 35, 35, 50, 6,  6,  6,  21, 21, 36, 36, 51, 7,  7,  7,  22, 22,
-  37, 37, 52, 8,  8,  8,  23, 23, 38, 38, 53, 9,  9,  9,  24, 24, 39, 39, 54,
-  10, 10, 10, 25, 25, 40, 40, 55, 11, 11, 11, 26, 26, 41, 41, 56, 12, 12, 12,
-  27, 27, 42, 42, 57, 13, 13, 13, 28, 28, 43, 43, 58, 14, 14, 14, 29, 29, 44,
-  44, 59, 15, 30, 30, 45, 45, 60, 31, 46, 46, 61, 47, 62, 0,  0
+  0,  0,  0,  0,  0,  0,  1,  16, 1,  1,  16, 16, 2,  17, 17, 32, 18, 33, 2,
+  2,  32, 32, 3,  18, 33, 48, 19, 34, 34, 49, 3,  3,  4,  19, 35, 50, 20, 35,
+  4,  4,  36, 51, 5,  20, 21, 36, 37, 52, 5,  5,  6,  21, 22, 37, 38, 53, 6,
+  6,  7,  22, 23, 38, 39, 54, 7,  7,  8,  23, 24, 39, 40, 55, 8,  8,  9,  24,
+  25, 40, 41, 56, 9,  9,  10, 25, 26, 41, 42, 57, 10, 10, 11, 26, 27, 42, 43,
+  58, 11, 11, 12, 27, 28, 43, 44, 59, 12, 12, 13, 28, 29, 44, 45, 60, 13, 13,
+  14, 29, 30, 45, 46, 61, 14, 14, 15, 30, 31, 46, 47, 62, 0,  0
 };
 
 #if CONFIG_EXT_TX
@@ -4307,17 +4307,17 @@
 };
 
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = {
-  0,  1,  3,  6,  2,  4,  7,  10, 5,  8,  11, 14, 9,  12, 15, 18,
-  13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 30, 25, 28, 31, 34,
-  29, 32, 35, 38, 33, 36, 39, 42, 37, 40, 43, 46, 41, 44, 47, 50,
-  45, 48, 51, 54, 49, 52, 55, 58, 53, 56, 59, 61, 57, 60, 62, 63,
+  0,  1,  4,  9,  2,  3,  6,  11, 5,  7,  8,  13, 10, 12, 14, 17,
+  15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
 };
 
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x4[64]) = {
-  0, 1,  3,  6,  10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54,
-  2, 4,  7,  11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 58,
-  5, 8,  12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 59, 61,
-  9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 60, 62, 63,
+  0,  1,  4,  9,  15, 19, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
+  2,  3,  6,  11, 16, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61,
+  5,  7,  8,  13, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62,
+  10, 12, 14, 17, 20, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
 };
 
 #if CONFIG_EXT_TX
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 41260d5..4619922 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -4518,6 +4518,10 @@
 #if !CONFIG_EC_ADAPT
   if (cm->tx_mode == TX_MODE_SELECT) read_tx_size_probs(fc, &r);
 #endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+  if (cm->tx_mode == TX_MODE_SELECT)
+    av1_diff_update_prob(&r, &fc->quarter_tx_size_prob, ACCT_STR);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 
 #if CONFIG_LV_MAP
   av1_read_txb_probs(fc, cm->tx_mode, &r);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index e551b93..6ddd5fa 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -510,6 +510,17 @@
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
       if (coded_tx_size > max_txsize_lookup[bsize]) {
         assert(coded_tx_size == max_txsize_lookup[bsize] + 1);
+#if CONFIG_RECT_TX_EXT
+        if (is_quarter_tx_allowed(xd, &xd->mi[0]->mbmi, is_inter)) {
+          int quarter_tx = aom_read(r, cm->fc->quarter_tx_size_prob, ACCT_STR);
+          FRAME_COUNTS *counts = xd->counts;
+
+          if (counts) ++counts->quarter_tx_size[quarter_tx];
+          return quarter_tx ? quarter_txsize_lookup[bsize]
+                            : max_txsize_rect_lookup[bsize];
+        }
+#endif  // CONFIG_RECT_TX_EXT
+
         return max_txsize_rect_lookup[bsize];
       }
 #else
@@ -1077,9 +1088,9 @@
   }
 #endif
 
-  mbmi->tx_size = read_tx_size(cm, xd, 0, 1, r);
   mbmi->ref_frame[0] = INTRA_FRAME;
   mbmi->ref_frame[1] = NONE_FRAME;
+  mbmi->tx_size = read_tx_size(cm, xd, 0, 1, r);
 
 #if CONFIG_INTRABC
   if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 5d723e8..4da7ba4 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -463,8 +463,6 @@
     const int depth = tx_size_to_depth(coded_tx_size);
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
     assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
-    assert(
-        IMPLIES(is_rect_tx(tx_size), tx_size == max_txsize_rect_lookup[bsize]));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 
 #if CONFIG_EC_MULTISYMBOL
@@ -475,6 +473,11 @@
                     ec_ctx->tx_size_probs[tx_size_cat][tx_size_ctx],
                     &tx_size_encodings[tx_size_cat][depth]);
 #endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
+      aom_write(w, tx_size == quarter_txsize_lookup[bsize],
+                cm->fc->quarter_tx_size_prob);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
   }
 }
 
@@ -4709,6 +4712,11 @@
 #if !CONFIG_EC_ADAPT
   update_txfm_probs(cm, header_bc, counts);
 #endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+  if (cm->tx_mode == TX_MODE_SELECT)
+    av1_cond_prob_diff_update(header_bc, &cm->fc->quarter_tx_size_prob,
+                              cm->counts.quarter_tx_size, probwt);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 #if CONFIG_LV_MAP
   av1_write_txb_probs(cpi, header_bc);
 #else
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index a2d5d22..8a37ca3 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5525,7 +5525,11 @@
     if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
       cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64;
 #else
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) {
+#else
     if (cm->tx_mode == TX_MODE_SELECT) {
+#endif
 #if CONFIG_TX64X64
       int count4x4 = 0;
       int count8x8_8x8p = 0, count8x8_lp = 0;
@@ -6068,6 +6072,13 @@
 
       ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth];
 #endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+      if (is_quarter_tx_allowed(xd, mbmi, is_inter) &&
+          mbmi->tx_size != txsize_sqr_up_map[mbmi->tx_size]) {
+        ++td->counts->quarter_tx_size[mbmi->tx_size ==
+                                      quarter_txsize_lookup[mbmi->sb_type]];
+      }
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 #if CONFIG_EXT_TX && CONFIG_RECT_TX
       assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi)));
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c
index 4fd5631..2432938 100644
--- a/av1/encoder/hybrid_fwd_txfm.c
+++ b/av1/encoder/hybrid_fwd_txfm.c
@@ -132,6 +132,36 @@
 }
 #endif  // CONFIG_TX64X64
 
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+static void fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff,
+                          int diff_stride, TX_TYPE tx_type,
+                          FWD_TXFM_OPT fwd_txfm_opt) {
+  (void)fwd_txfm_opt;
+  av1_fht16x4(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff,
+                          int diff_stride, TX_TYPE tx_type,
+                          FWD_TXFM_OPT fwd_txfm_opt) {
+  (void)fwd_txfm_opt;
+  av1_fht4x16(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff,
+                          int diff_stride, TX_TYPE tx_type,
+                          FWD_TXFM_OPT fwd_txfm_opt) {
+  (void)fwd_txfm_opt;
+  av1_fht32x8(src_diff, coeff, diff_stride, tx_type);
+}
+
+static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
+                          int diff_stride, TX_TYPE tx_type,
+                          FWD_TXFM_OPT fwd_txfm_opt) {
+  (void)fwd_txfm_opt;
+  av1_fht8x32(src_diff, coeff, diff_stride, tx_type);
+}
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+
 #if CONFIG_HIGHBITDEPTH
 #if CONFIG_CB4X4
 static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
@@ -430,6 +460,20 @@
       fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless);
       break;
 #endif
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    case TX_4X16:
+      fwd_txfm_4x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+      break;
+    case TX_16X4:
+      fwd_txfm_16x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+      break;
+    case TX_8X32:
+      fwd_txfm_8x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+      break;
+    case TX_32X8:
+      fwd_txfm_32x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
+      break;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
     default: assert(0); break;
   }
 }
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index e097392..b20562f 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -647,6 +647,34 @@
       for (i = 0; i < num_4x4_h; i += 8)
         t_left[i] = !!*(const uint64_t *)&left[i];
       break;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    case TX_4X16:
+      for (i = 0; i < num_4x4_w; i += 2)
+        t_above[i] = !!*(const uint16_t *)&above[i];
+      for (i = 0; i < num_4x4_h; i += 8)
+        t_left[i] = !!*(const uint64_t *)&left[i];
+      break;
+    case TX_16X4:
+      for (i = 0; i < num_4x4_w; i += 8)
+        t_above[i] = !!*(const uint64_t *)&above[i];
+      for (i = 0; i < num_4x4_h; i += 2)
+        t_left[i] = !!*(const uint16_t *)&left[i];
+      break;
+    case TX_8X32:
+      for (i = 0; i < num_4x4_w; i += 4)
+        t_above[i] = !!*(const uint32_t *)&above[i];
+      for (i = 0; i < num_4x4_h; i += 16)
+        t_left[i] =
+            !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
+      break;
+    case TX_32X8:
+      for (i = 0; i < num_4x4_w; i += 16)
+        t_above[i] =
+            !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]);
+      for (i = 0; i < num_4x4_h; i += 4)
+        t_left[i] = !!*(const uint32_t *)&left[i];
+      break;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
 
     default: assert(0 && "Invalid transform size."); break;
   }
@@ -720,6 +748,30 @@
       for (i = 0; i < num_4x4_h; i += 4)
         t_left[i] = !!*(const uint32_t *)&left[i];
       break;
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    case TX_4X16:
+      memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+      for (i = 0; i < num_4x4_h; i += 4)
+        t_left[i] = !!*(const uint32_t *)&left[i];
+      break;
+    case TX_16X4:
+      for (i = 0; i < num_4x4_w; i += 4)
+        t_above[i] = !!*(const uint32_t *)&above[i];
+      memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+      break;
+    case TX_8X32:
+      for (i = 0; i < num_4x4_w; i += 2)
+        t_above[i] = !!*(const uint16_t *)&above[i];
+      for (i = 0; i < num_4x4_h; i += 8)
+        t_left[i] = !!*(const uint64_t *)&left[i];
+      break;
+    case TX_32X8:
+      for (i = 0; i < num_4x4_w; i += 8)
+        t_above[i] = !!*(const uint64_t *)&above[i];
+      for (i = 0; i < num_4x4_h; i += 2)
+        t_left[i] = !!*(const uint16_t *)&left[i];
+      break;
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
     default: assert(0 && "Invalid transform size."); break;
   }
 }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 63e594a..af0a9ce 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1234,7 +1234,9 @@
                                BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
                                BLOCK_SIZE tx_bsize, int *width, int *height,
                                int *visible_width, int *visible_height) {
+#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
   assert(tx_bsize <= plane_bsize);
+#endif  // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
   int txb_height = block_size_high[tx_bsize];
   int txb_width = block_size_wide[tx_bsize];
   const int block_height = block_size_high[plane_bsize];
@@ -1270,7 +1272,12 @@
                      &txb_cols, &txb_rows, &visible_cols, &visible_rows);
   assert(visible_rows > 0);
   assert(visible_cols > 0);
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+  if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
+      tx_bsize < BLOCK_SIZES) {
+#else
   if (txb_rows == visible_rows && txb_cols == visible_cols) {
+#endif
     unsigned sse;
     cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
     return sse;
@@ -1795,7 +1802,12 @@
     const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
     const int depth = tx_size_to_depth(coded_tx_size);
     const int tx_size_ctx = get_tx_size_context(xd);
-    const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
+    int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
+#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
+    if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
+      r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
+                                tx_size == quarter_txsize_lookup[bsize]);
+#endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
     return r_tx_size;
   } else {
     return 0;
@@ -2198,6 +2210,56 @@
 #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
     }
   }
+
+#if CONFIG_RECT_TX_EXT
+  // test 1:4/4:1 tx
+  int evaluate_quarter_tx = 0;
+  if (is_quarter_tx_allowed(xd, mbmi, is_inter)) {
+    if (tx_select) {
+      evaluate_quarter_tx = 1;
+    } else {
+      const TX_SIZE chosen_tx_size =
+          tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
+      evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs];
+    }
+  }
+  if (evaluate_quarter_tx) {
+    TX_TYPE tx_start = DCT_DCT;
+    TX_TYPE tx_end = TX_TYPES;
+#if CONFIG_TXK_SEL
+    // The tx_type becomes dummy when lv_map is on. The tx_type search will be
+    // performed in av1_search_txk_type()
+    tx_end = DCT_DCT + 1;
+#endif
+    TX_TYPE tx_type;
+    for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
+      if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
+      const TX_SIZE tx_size = quarter_txsize_lookup[bs];
+      RD_STATS this_rd_stats;
+      int ext_tx_set =
+          get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
+      if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
+          (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
+        rd =
+            txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
+        if (rd < best_rd) {
+#if CONFIG_TXK_SEL
+          memcpy(best_txk_type, mbmi->txk_type,
+                 sizeof(best_txk_type[0]) * num_blk);
+#endif
+          best_tx_type = tx_type;
+          best_tx_size = tx_size;
+          best_rd = rd;
+          *rd_stats = this_rd_stats;
+        }
+      }
+#if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
+      const int is_inter = is_inter_block(mbmi);
+      if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
+#endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
+    }
+  }
+#endif  // CONFIG_RECT_TX_EXT
 #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
 
   if (tx_select) {
@@ -3442,12 +3504,14 @@
   },
 };
 
+/* clang-format off */
 static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
   0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
 #if CONFIG_ALT_INTRA
   0,
 #endif  // CONFIG_ALT_INTRA
 };
+/* clang-format on */
 
 static void angle_estimation(const uint8_t *src, int src_stride, int rows,
                              int cols, BLOCK_SIZE bsize,