Disallow 4:1/1:4 partitions for BLOCK_128x128

The change can be temporarily reverted by setting ALLOW_128X32_BLOCKS
to 1 (for easy "is this better?" testing: we'll get rid of the option
once it's clear what we're doing).

Since this means we have to modify the "num_partition_types"
calculation in functions like read_partition, we move it into a helper
function called partition_cdf_length(). Doing this makes the bodies of
read_partition and write_partition somewhat simpler than they were.

Change-Id: I3573f62f71dc3344aed2dcb3e423e034eb56a7c5
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index a07850a..0322979 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -1767,12 +1767,19 @@
   { AOM_CDF10(7424, 9008, 9528, 30664, 31192, 31720, 31893, 32066, 32594) },
   { AOM_CDF10(1280, 1710, 2069, 31978, 32121, 32264, 32383, 32502, 32647) },
 #if CONFIG_EXT_PARTITION
-  // 128x128 -> 64x64
+#if ALLOW_128X32_BLOCKS
   { AOM_CDF10(28416, 28705, 28926, 32258, 32354, 32450, 32523, 32596, 32693) },
   { AOM_CDF10(9216, 9952, 11849, 30134, 30379, 30624, 31256, 31888, 32134) },
   { AOM_CDF10(7424, 9008, 9528, 30664, 31192, 31720, 31893, 32066, 32594) },
   { AOM_CDF10(1280, 1710, 2069, 31978, 32121, 32264, 32383, 32502, 32647) },
-#endif
+#else
+  // 128x128 -> 64x64
+  { AOM_CDF8(28416, 28705, 28926, 32258, 32402, 32547, 32548) },
+  { AOM_CDF8(9216, 9952, 11849, 30134, 30502, 30870, 30871) },
+  { AOM_CDF8(7424, 9008, 9528, 30664, 31456, 32248, 32249) },
+  { AOM_CDF8(1280, 1710, 2069, 31978, 32193, 32409, 32410) },
+#endif  // ALLOW_128X32_BLOCKS
+#endif  // CONFIG_EXT_PARTITION
 };
 #else
 static const aom_cdf_prob
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 358be3a..ca92222 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -53,6 +53,11 @@
 #define KF_MODE_CONTEXTS 5
 #endif
 
+// A define to configure whether 4:1 and 1:4 partitions are allowed for 128x128
+// blocks. They seem not to be giving great results (and might be expensive to
+// implement in hardware), so this is a toggle to conditionally disable them.
+#define ALLOW_128X32_BLOCKS 0
+
 struct AV1Common;
 
 typedef struct {
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index fdc1bc2..36fa27e 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -988,7 +988,9 @@
 }
 
 static INLINE void partition_gather_horz_alike(aom_cdf_prob *out,
-                                               const aom_cdf_prob *const in) {
+                                               const aom_cdf_prob *const in,
+                                               BLOCK_SIZE bsize) {
+  (void)bsize;
   out[0] = CDF_PROB_TOP;
   out[0] -= cdf_element_prob(in, PARTITION_HORZ);
   out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
@@ -996,14 +998,19 @@
   out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
   out[0] -= cdf_element_prob(in, PARTITION_HORZ_B);
   out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
-  out[0] -= cdf_element_prob(in, PARTITION_HORZ_4);
-#endif
+#if !ALLOW_128X32_BLOCKS
+  if (bsize != BLOCK_128X128)
+#endif  // ALLOW_128X32_BLOCKS
+    out[0] -= cdf_element_prob(in, PARTITION_HORZ_4);
+#endif  // CONFIG_EXT_PARTITION_TYPES
   out[0] = AOM_ICDF(out[0]);
   out[1] = AOM_ICDF(CDF_PROB_TOP);
 }
 
 static INLINE void partition_gather_vert_alike(aom_cdf_prob *out,
-                                               const aom_cdf_prob *const in) {
+                                               const aom_cdf_prob *const in,
+                                               BLOCK_SIZE bsize) {
+  (void)bsize;
   out[0] = CDF_PROB_TOP;
   out[0] -= cdf_element_prob(in, PARTITION_VERT);
   out[0] -= cdf_element_prob(in, PARTITION_SPLIT);
@@ -1011,8 +1018,11 @@
   out[0] -= cdf_element_prob(in, PARTITION_HORZ_A);
   out[0] -= cdf_element_prob(in, PARTITION_VERT_A);
   out[0] -= cdf_element_prob(in, PARTITION_VERT_B);
-  out[0] -= cdf_element_prob(in, PARTITION_VERT_4);
-#endif
+#if !ALLOW_128X32_BLOCKS
+  if (bsize != BLOCK_128X128)
+#endif  // ALLOW_128X32_BLOCKS
+    out[0] -= cdf_element_prob(in, PARTITION_VERT_4);
+#endif  // CONFIG_EXT_PARTITION_TYPES
   out[0] = AOM_ICDF(out[0]);
   out[1] = AOM_ICDF(CDF_PROB_TOP);
 }
@@ -1100,6 +1110,23 @@
   return (left * 2 + above) + bsl * PARTITION_PLOFFSET;
 }
 
+// Return the number of elements in the partition CDF when
+// partitioning the (square) block with luma block size of bsize.
+static INLINE int partition_cdf_length(BLOCK_SIZE bsize) {
+#if CONFIG_EXT_PARTITION_TYPES
+  if (bsize <= BLOCK_8X8) return PARTITION_TYPES;
+#if !ALLOW_128X32_BLOCKS
+  else if (bsize == BLOCK_128X128)
+    return EXT_PARTITION_TYPES - 2;
+#endif  // !ALLOW_128X32_BLOCKS
+  else
+    return EXT_PARTITION_TYPES;
+#else
+  (void)bsize;
+  return PARTITION_TYPES;
+#endif
+}
+
 static INLINE int max_block_wide(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
                                  int plane) {
   int max_blocks_wide = block_size_wide[bsize];
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 427580b..83d3eb1 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -658,46 +658,33 @@
 #endif
 }
 
-static PARTITION_TYPE read_partition(AV1_COMMON *cm, MACROBLOCKD *xd,
-                                     int mi_row, int mi_col, aom_reader *r,
-                                     int has_rows, int has_cols,
+static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     aom_reader *r, int has_rows, int has_cols,
                                      BLOCK_SIZE bsize) {
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-  PARTITION_TYPE p;
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  (void)cm;
 
-  aom_cdf_prob *partition_cdf = (ctx >= 0) ? ec_ctx->partition_cdf[ctx] : NULL;
+  if (!has_rows && !has_cols) return PARTITION_SPLIT;
 
+  assert(ctx >= 0);
+  aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[ctx];
   if (has_rows && has_cols) {
-#if CONFIG_EXT_PARTITION_TYPES
-    const int num_partition_types =
-        (mi_width_log2_lookup[bsize] > mi_width_log2_lookup[BLOCK_8X8])
-            ? EXT_PARTITION_TYPES
-            : PARTITION_TYPES;
-#else
-    const int num_partition_types = PARTITION_TYPES;
-#endif  // CONFIG_EXT_PARTITION_TYPES
-    p = (PARTITION_TYPE)aom_read_symbol(r, partition_cdf, num_partition_types,
-                                        ACCT_STR);
+    return (PARTITION_TYPE)aom_read_symbol(
+        r, partition_cdf, partition_cdf_length(bsize), ACCT_STR);
   } else if (!has_rows && has_cols) {
     assert(bsize > BLOCK_8X8);
     aom_cdf_prob cdf[2];
-    partition_gather_vert_alike(cdf, partition_cdf);
+    partition_gather_vert_alike(cdf, partition_cdf, bsize);
     assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
-    p = aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
-    // gather cols
-  } else if (has_rows && !has_cols) {
+    return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
+  } else {
+    assert(has_rows && !has_cols);
     assert(bsize > BLOCK_8X8);
     aom_cdf_prob cdf[2];
-    partition_gather_horz_alike(cdf, partition_cdf);
+    partition_gather_horz_alike(cdf, partition_cdf, bsize);
     assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
-    p = aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
-  } else {
-    p = PARTITION_SPLIT;
+    return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
   }
-
-  return p;
 }
 
 // TODO(slavarnway): eliminate bsize and subsize in future commits
@@ -725,7 +712,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
 
   partition = (bsize < BLOCK_8X8) ? PARTITION_NONE
-                                  : read_partition(cm, xd, mi_row, mi_col, r,
+                                  : read_partition(xd, mi_row, mi_col, r,
                                                    has_rows, has_cols, bsize);
   subsize = subsize_lookup[partition][bsize];  // get_subsize(bsize, partition);
 
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index e7c5911..1bacf31 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2043,41 +2043,36 @@
                             const MACROBLOCKD *const xd, int hbs, int mi_row,
                             int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize,
                             aom_writer *w) {
-  const int has_rows = (mi_row + hbs) < cm->mi_rows;
-  const int has_cols = (mi_col + hbs) < cm->mi_cols;
   const int is_partition_point = bsize >= BLOCK_8X8;
-  const int ctx = is_partition_point
-                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
-                      : 0;
-  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  (void)cm;
 
   if (!is_partition_point) return;
 
+  const int has_rows = (mi_row + hbs) < cm->mi_rows;
+  const int has_cols = (mi_col + hbs) < cm->mi_cols;
+  const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+
+  if (!has_rows && !has_cols) {
+    assert(p == PARTITION_SPLIT);
+    return;
+  }
+
   if (has_rows && has_cols) {
-#if CONFIG_EXT_PARTITION_TYPES
-    const int num_partition_types =
-        (mi_width_log2_lookup[bsize] > mi_width_log2_lookup[BLOCK_8X8])
-            ? EXT_PARTITION_TYPES
-            : PARTITION_TYPES;
-#else
-    const int num_partition_types = PARTITION_TYPES;
-#endif
-    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], num_partition_types);
+    aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx],
+                     partition_cdf_length(bsize));
   } else if (!has_rows && has_cols) {
     assert(p == PARTITION_SPLIT || p == PARTITION_HORZ);
     assert(bsize > BLOCK_8X8);
     aom_cdf_prob cdf[2];
-    partition_gather_vert_alike(cdf, ec_ctx->partition_cdf[ctx]);
+    partition_gather_vert_alike(cdf, ec_ctx->partition_cdf[ctx], bsize);
     aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
-  } else if (has_rows && !has_cols) {
+  } else {
+    assert(has_rows && !has_cols);
     assert(p == PARTITION_SPLIT || p == PARTITION_VERT);
     assert(bsize > BLOCK_8X8);
     aom_cdf_prob cdf[2];
-    partition_gather_horz_alike(cdf, ec_ctx->partition_cdf[ctx]);
+    partition_gather_horz_alike(cdf, ec_ctx->partition_cdf[ctx], bsize);
     aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2);
-  } else {
-    assert(p == PARTITION_SPLIT);
   }
 }
 
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 9ede0bb..b0a59a5 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -2553,13 +2553,13 @@
     if (has_cols) {
       // At the bottom, the two possibilities are HORZ and SPLIT
       aom_cdf_prob bot_cdf[2];
-      partition_gather_vert_alike(bot_cdf, partition_cdf);
+      partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
       static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
       av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
     } else if (has_rows) {
       // At the right, the two possibilities are VERT and SPLIT
       aom_cdf_prob rhs_cdf[2];
-      partition_gather_horz_alike(rhs_cdf, partition_cdf);
+      partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
       static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
       av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
     } else {
@@ -3014,22 +3014,22 @@
   const int ext_partition_allowed =
       do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
 
-  // horz4_partition_allowed and vert4_partition_allowed encode the requirement
-  // that we don't choose a block size that wouldn't be allowed by this
-  // subsampling (stored in the xss and yss variables).
-  //
-  // We definitely can't allow (say) a 16x4 block if yss > xss because it would
-  // subsample to 16x2, which doesn't have an enum. Also, there's no BLOCK_8X2
-  // or BLOCK_2X8, so we can't do 4:1 or 1:4 partitions for BLOCK_16X16 if there
-  // is any subsampling.
-  int horz4_partition_allowed = ext_partition_allowed && partition_horz_allowed;
-  int vert4_partition_allowed = ext_partition_allowed && partition_vert_allowed;
+// partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or PARTITION_VERT_4
+// for this block. This is almost the same as ext_partition_allowed, except
+// that we don't allow 128x32 or 32x128 blocks if ALLOW_128X32_BLOCKS is false,
+// so we require that bsize is not BLOCK_128X128.
+#if CONFIG_EXT_PARTITION && !ALLOW_128X32_BLOCKS
+  const int partition4_allowed =
+      ext_partition_allowed && bsize != BLOCK_128X128;
+#else
+  const int partition4_allowed = ext_partition_allowed;
+#endif
 
 #if CONFIG_EXT_PARTITION_TYPES_AB
   // The alternative AB partitions are allowed iff the corresponding 4:1
   // partitions are allowed.
-  int horzab_partition_allowed = horz4_partition_allowed;
-  int vertab_partition_allowed = vert4_partition_allowed;
+  int horzab_partition_allowed = partition4_allowed;
+  int vertab_partition_allowed = partition4_allowed;
 #else
   // The standard AB partitions are allowed whenever ext-partition-types are
   // allowed
@@ -3163,17 +3163,15 @@
 #endif
 
   // PARTITION_HORZ_4
-  // TODO(david.barker): For this and PARTITION_VERT_4,
-  // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the
-  //   chroma plane
+  int partition_horz4_allowed = partition4_allowed && partition_horz_allowed;
   if (cpi->sf.prune_ext_partition_types_search) {
-    horz4_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
+    partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
                                 pc_tree->partitioning == PARTITION_HORZ_A ||
                                 pc_tree->partitioning == PARTITION_HORZ_B ||
                                 pc_tree->partitioning == PARTITION_SPLIT ||
                                 pc_tree->partitioning == PARTITION_NONE);
   }
-  if (horz4_partition_allowed && has_rows &&
+  if (partition_horz4_allowed && has_rows &&
       (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
     const int quarter_step = mi_size_high[bsize] / 4;
     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
@@ -3205,15 +3203,17 @@
     }
     restore_context(x, &x_ctx, mi_row, mi_col, bsize);
   }
+
   // PARTITION_VERT_4
+  int partition_vert4_allowed = partition4_allowed && partition_vert_allowed;
   if (cpi->sf.prune_ext_partition_types_search) {
-    vert4_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
+    partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
                                 pc_tree->partitioning == PARTITION_VERT_A ||
                                 pc_tree->partitioning == PARTITION_VERT_B ||
                                 pc_tree->partitioning == PARTITION_SPLIT ||
                                 pc_tree->partitioning == PARTITION_NONE);
   }
-  if (vert4_partition_allowed && has_cols &&
+  if (partition_vert4_allowed && has_cols &&
       (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) {
     const int quarter_step = mi_size_wide[bsize] / 4;
     PICK_MODE_CONTEXT *ctx_prev = ctx_none;