Add 32x128/128x32 block sizes

Change-Id: Ieb28f40d85e4db4af33648c32c406dd2931ceb89
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 0b5e561..b71011d 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2853,13 +2853,14 @@
 
   if (has_rows && has_cols) {
 #if CONFIG_EXT_PARTITION_TYPES
-    const int bsl =
-        mi_width_log2_lookup[bsize] - mi_width_log2_lookup[BLOCK_8X8];
-    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx],
-                     av1_num_partition_types[bsl]);
+    const int num_partition_types =
+        (mi_width_log2_lookup[bsize] > mi_width_log2_lookup[BLOCK_8X8])
+            ? EXT_PARTITION_TYPES
+            : PARTITION_TYPES;
 #else
-    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES);
-#endif  // CONFIG_EXT_PARTITION_TYPES
+    const int num_partition_types = PARTITION_TYPES;
+#endif
+    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], num_partition_types);
   } else if (!has_rows && has_cols) {
     assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
     assert(bsize > BLOCK_8X8);
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index c4afad5..ef72bf4 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2933,7 +2933,10 @@
   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,  // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
   BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,    //   4x16,   16x4,    8x32
-  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16   //   32x8,   16x64,  64x16
+  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,  //   32x8,  16x64,   64x16
+#if CONFIG_EXT_PARTITION
+  BLOCK_16X16, BLOCK_16X16                // 32x128, 128x32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = {
@@ -2949,7 +2952,10 @@
   BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST,  // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
   BLOCK_16X16,   BLOCK_16X16,   BLOCK_32X32,    //   4x16,   16x4,    8x32
-  BLOCK_32X32,   BLOCK_LARGEST, BLOCK_LARGEST   //   32x8,  16x64,   64x16
+  BLOCK_32X32,   BLOCK_LARGEST, BLOCK_LARGEST,  //   32x8,  16x64,   64x16
+#if CONFIG_EXT_PARTITION
+  BLOCK_LARGEST, BLOCK_LARGEST                  // 32x128, 128x32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 // Next square block size less or equal than current block size.
@@ -2966,7 +2972,10 @@
   BLOCK_64X64, BLOCK_64X64, BLOCK_128X128,  // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
   BLOCK_4X4,   BLOCK_4X4,   BLOCK_8X8,      //   4x16,   16x4,    8x32
-  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16     //   32x8,  16x64,   64x16
+  BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,    //   32x8,  16x64,   64x16
+#if CONFIG_EXT_PARTITION
+  BLOCK_32X32, BLOCK_32X32                  // 32x128, 128x32
+#endif  // CONFIG_EXT_PARTITION
 };
 /* clang-format on */
 
@@ -4347,13 +4356,20 @@
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
 
+#if CONFIG_EXT_PARTITION
+  const int can_partition_4 = (bsize == BLOCK_128X128 || bsize == BLOCK_64X64 ||
+                               bsize == BLOCK_32X32 || bsize == BLOCK_16X16);
+#else
+  const int can_partition_4 =
+      (bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16);
+#endif  // CONFIG_EXT_PARTITION
+
   // PARTITION_HORZ_4
   // TODO(david.barker): For this and PARTITION_VERT_4,
   // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the
   //   chroma plane
   // * Add support for supertx
-  if ((bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16) &&
-      partition_horz_allowed && !force_horz_split &&
+  if (can_partition_4 && partition_horz_allowed && !force_horz_split &&
       (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
     const int quarter_step = mi_size_high[bsize] / 4;
     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
@@ -4390,8 +4406,7 @@
 #endif
   }
   // PARTITION_VERT_4
-  if ((bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16) &&
-      partition_vert_allowed && !force_vert_split &&
+  if (can_partition_4 && partition_vert_allowed && !force_vert_split &&
       (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) {
     const int quarter_step = mi_size_wide[bsize] / 4;
     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index d9e898a..866194c 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1368,7 +1368,15 @@
 MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x16)
 MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x16_avg)
 MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x16x4d)
-#endif
+#if CONFIG_EXT_PARTITION
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x128)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x128_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x128x4d)
+MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x32)
+MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x32_avg)
+MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x32x4d)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_EXT_INTER
 #define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
@@ -1426,7 +1434,11 @@
 MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8)
 MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x64)
 MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16)
-#endif
+#if CONFIG_EXT_PARTITION
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x128)
+MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_MOTION_VAR
@@ -1478,7 +1490,11 @@
 MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8)
 MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x64)
 MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x16)
-#endif
+#if CONFIG_EXT_PARTITION
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x128)
+MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_MOTION_VAR
 
 static void highbd_set_var_fns(AV1_COMP *const cpi) {
@@ -1487,6 +1503,20 @@
     switch (cm->bit_depth) {
       case AOM_BITS_8:
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X32, aom_highbd_sad128x32_bits8,
+                   aom_highbd_sad128x32_avg_bits8, aom_highbd_8_variance128x32,
+                   aom_highbd_8_sub_pixel_variance128x32,
+                   aom_highbd_8_sub_pixel_avg_variance128x32, NULL, NULL,
+                   aom_highbd_sad128x32x4d_bits8)
+
+        HIGHBD_BFP(BLOCK_32X128, aom_highbd_sad32x128_bits8,
+                   aom_highbd_sad32x128_avg_bits8, aom_highbd_8_variance32x128,
+                   aom_highbd_8_sub_pixel_variance32x128,
+                   aom_highbd_8_sub_pixel_avg_variance32x128, NULL, NULL,
+                   aom_highbd_sad32x128x4d_bits8)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits8,
                    aom_highbd_sad64x16_avg_bits8, aom_highbd_8_variance64x16,
                    aom_highbd_8_sub_pixel_variance64x16,
@@ -1672,6 +1702,14 @@
         HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8,
                     aom_highbd_8_masked_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X32, aom_highbd_masked_sad128x32_bits8,
+                    aom_highbd_8_masked_sub_pixel_variance128x32)
+
+        HIGHBD_MBFP(BLOCK_32X128, aom_highbd_masked_sad32x128_bits8,
+                    aom_highbd_8_masked_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits8,
                     aom_highbd_8_masked_sub_pixel_variance64x16)
 
@@ -1743,6 +1781,16 @@
                     aom_highbd_obmc_variance4x4,
                     aom_highbd_obmc_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_OBFP(BLOCK_128X32, aom_highbd_obmc_sad128x32_bits8,
+                    aom_highbd_obmc_variance128x32,
+                    aom_highbd_obmc_sub_pixel_variance128x32)
+
+        HIGHBD_OBFP(BLOCK_32X128, aom_highbd_obmc_sad32x128_bits8,
+                    aom_highbd_obmc_variance32x128,
+                    aom_highbd_obmc_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits8,
                     aom_highbd_obmc_variance64x16,
                     aom_highbd_obmc_sub_pixel_variance64x16)
@@ -1772,6 +1820,22 @@
 
       case AOM_BITS_10:
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X32, aom_highbd_sad128x32_bits10,
+                   aom_highbd_sad128x32_avg_bits10,
+                   aom_highbd_10_variance128x32,
+                   aom_highbd_10_sub_pixel_variance128x32,
+                   aom_highbd_10_sub_pixel_avg_variance128x32, NULL, NULL,
+                   aom_highbd_sad128x32x4d_bits10)
+
+        HIGHBD_BFP(BLOCK_32X128, aom_highbd_sad32x128_bits10,
+                   aom_highbd_sad32x128_avg_bits10,
+                   aom_highbd_10_variance32x128,
+                   aom_highbd_10_sub_pixel_variance32x128,
+                   aom_highbd_10_sub_pixel_avg_variance32x128, NULL, NULL,
+                   aom_highbd_sad32x128x4d_bits10)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits10,
                    aom_highbd_sad64x16_avg_bits10, aom_highbd_10_variance64x16,
                    aom_highbd_10_sub_pixel_variance64x16,
@@ -1961,6 +2025,14 @@
         HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10,
                     aom_highbd_10_masked_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X32, aom_highbd_masked_sad128x32_bits10,
+                    aom_highbd_10_masked_sub_pixel_variance128x32)
+
+        HIGHBD_MBFP(BLOCK_32X128, aom_highbd_masked_sad32x128_bits10,
+                    aom_highbd_10_masked_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits10,
                     aom_highbd_10_masked_sub_pixel_variance64x16)
 
@@ -2032,6 +2104,16 @@
                     aom_highbd_10_obmc_variance4x4,
                     aom_highbd_10_obmc_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_OBFP(BLOCK_128X32, aom_highbd_obmc_sad128x32_bits10,
+                    aom_highbd_10_obmc_variance128x32,
+                    aom_highbd_10_obmc_sub_pixel_variance128x32)
+
+        HIGHBD_OBFP(BLOCK_32X128, aom_highbd_obmc_sad32x128_bits10,
+                    aom_highbd_10_obmc_variance32x128,
+                    aom_highbd_10_obmc_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits10,
                     aom_highbd_10_obmc_variance64x16,
                     aom_highbd_10_obmc_sub_pixel_variance64x16)
@@ -2061,6 +2143,22 @@
 
       case AOM_BITS_12:
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_BFP(BLOCK_128X32, aom_highbd_sad128x32_bits12,
+                   aom_highbd_sad128x32_avg_bits12,
+                   aom_highbd_12_variance128x32,
+                   aom_highbd_12_sub_pixel_variance128x32,
+                   aom_highbd_12_sub_pixel_avg_variance128x32, NULL, NULL,
+                   aom_highbd_sad128x32x4d_bits12)
+
+        HIGHBD_BFP(BLOCK_32X128, aom_highbd_sad32x128_bits12,
+                   aom_highbd_sad32x128_avg_bits12,
+                   aom_highbd_12_variance32x128,
+                   aom_highbd_12_sub_pixel_variance32x128,
+                   aom_highbd_12_sub_pixel_avg_variance32x128, NULL, NULL,
+                   aom_highbd_sad32x128x4d_bits12)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits12,
                    aom_highbd_sad64x16_avg_bits12, aom_highbd_12_variance64x16,
                    aom_highbd_12_sub_pixel_variance64x16,
@@ -2250,6 +2348,14 @@
         HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12,
                     aom_highbd_12_masked_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_MBFP(BLOCK_128X32, aom_highbd_masked_sad128x32_bits12,
+                    aom_highbd_12_masked_sub_pixel_variance128x32)
+
+        HIGHBD_MBFP(BLOCK_32X128, aom_highbd_masked_sad32x128_bits12,
+                    aom_highbd_12_masked_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits12,
                     aom_highbd_12_masked_sub_pixel_variance64x16)
 
@@ -2322,6 +2428,16 @@
                     aom_highbd_12_obmc_variance4x4,
                     aom_highbd_12_obmc_sub_pixel_variance4x4)
 #if CONFIG_EXT_PARTITION_TYPES
+#if CONFIG_EXT_PARTITION
+        HIGHBD_OBFP(BLOCK_128X32, aom_highbd_obmc_sad128x32_bits12,
+                    aom_highbd_12_obmc_variance128x32,
+                    aom_highbd_12_obmc_sub_pixel_variance128x32)
+
+        HIGHBD_OBFP(BLOCK_32X128, aom_highbd_obmc_sad32x128_bits12,
+                    aom_highbd_12_obmc_variance32x128,
+                    aom_highbd_12_obmc_sub_pixel_variance32x128)
+#endif  // CONFIG_EXT_PARTITION
+
         HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits12,
                     aom_highbd_12_obmc_variance64x16,
                     aom_highbd_12_obmc_sub_pixel_variance64x16)
@@ -2768,7 +2884,17 @@
   BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16,
       aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, NULL, NULL,
       aom_sad64x16x4d)
-#endif
+
+#if CONFIG_EXT_PARTITION
+  BFP(BLOCK_32X128, aom_sad32x128, aom_sad32x128_avg, aom_variance32x128,
+      aom_sub_pixel_variance32x128, aom_sub_pixel_avg_variance32x128, NULL,
+      NULL, aom_sad32x128x4d)
+
+  BFP(BLOCK_128X32, aom_sad128x32, aom_sad128x32_avg, aom_variance128x32,
+      aom_sub_pixel_variance128x32, aom_sub_pixel_avg_variance128x32, NULL,
+      NULL, aom_sad128x32x4d)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 
 #if CONFIG_EXT_PARTITION
   BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128,
@@ -2901,7 +3027,15 @@
 
   OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16,
        aom_obmc_sub_pixel_variance64x16)
-#endif
+
+#if CONFIG_EXT_PARTITION
+  OBFP(BLOCK_32X128, aom_obmc_sad32x128, aom_obmc_variance32x128,
+       aom_obmc_sub_pixel_variance32x128)
+
+  OBFP(BLOCK_128X32, aom_obmc_sad128x32, aom_obmc_variance128x32,
+       aom_obmc_sub_pixel_variance128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_MOTION_VAR
 
 #if CONFIG_EXT_INTER
@@ -2941,7 +3075,13 @@
   MBFP(BLOCK_16X64, aom_masked_sad16x64, aom_masked_sub_pixel_variance16x64)
 
   MBFP(BLOCK_64X16, aom_masked_sad64x16, aom_masked_sub_pixel_variance64x16)
-#endif
+
+#if CONFIG_EXT_PARTITION
+  MBFP(BLOCK_32X128, aom_masked_sad32x128, aom_masked_sub_pixel_variance32x128)
+
+  MBFP(BLOCK_128X32, aom_masked_sad128x32, aom_masked_sub_pixel_variance128x32)
+#endif  // CONFIG_EXT_PARTITION
+#endif  // CONFIG_EXT_PARTITION_TYPES
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_HIGHBITDEPTH
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 3fcf10f..8524ca2 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -57,11 +57,14 @@
 #if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8
   2,  2,  2,
 #endif
-  2,  3,  3,  4, 6,  6, 8, 12, 12, 16, 24, 24, 32,
+  2,  3,  3,  4, 6,  6,  8, 12, 12, 16, 24, 24, 32,
 #if CONFIG_EXT_PARTITION
   48, 48, 64,
 #endif  // CONFIG_EXT_PARTITION
-  4,  4,  8,  8, 16, 16
+  4,  4,  8,  8, 16, 16,
+#if CONFIG_EXT_PARTITION
+  32, 32
+#endif  // CONFIG_EXT_PARTITION
 };
 
 #if CONFIG_EXT_TX