Add support for 16x4 partitions

When updating default_partition_cdf, this sums the probabilities that
were divided evenly across the pairs PARTITION_HORZ_A/PARTITION_HORZ_B
and PARTITION_VERT_A/PARTITION_VERT_B. Those summed probabilities now
get distributed evenly across the triples you get by adding
PARTITION_HORZ_4 and PARTITION_VERT_4, respectively.

Rather than implement 2X8/8X2 blocks for now, ss_size_lookup returns
4X8/8X4 block sizes to use as chroma transform sizes for 4X16/16X4
blocks.

The changes in setup_pred_plane and set_skip_context are because this
is presumably the first time we've had to deal with 16x4 or 4x16
blocks. Since BLOCK_16X4 is not less than BLOCK_8X8, the existing
logic didn't work (and the "shuffle back one" logic should probably be
done for small widths and heights separately).

Change-Id: If28d8954da42d6c726f2bcce2cb5242154b0870c
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index 825fb1f..ee8d3bf 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -772,10 +772,10 @@
 #endif  // CONFIG_TX64X64
   // TODO(david.barker): Change these if we support rectangular transforms
   // for 4:1 shaped partitions
-  // 4x16,          16x4,              8x32
-  INT32_MIN,        INT32_MIN,         TX_8X8 - TX_8X8,
-  // 32x8,          16x64,             64x16
-  TX_8X8 - TX_8X8,  TX_16X16 - TX_8X8, TX_16X16 - TX_8X8
+  // 4x16,            16x4,               8x32
+  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,
+  // 32x8,            16x64,              64x16
+  TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8
 };
 #else
 // Same as "max_txsize_lookup[bsize] - TX_8X8", invalid for bsize < 8X8
@@ -809,10 +809,10 @@
   TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,  TX_32X32 - TX_8X8,
 #endif  // CONFIG_EXT_PARTITION
 #endif  // CONFIG_TX64X64
-  // 4x16,            16x4,              8x32
-  INT32_MIN,          INT32_MIN,         TX_8X8 - TX_8X8,
-  // 32x8             16x64,             64x16
-  TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8, TX_16X16 - TX_8X8
+  // 4x16,            16x4,               8x32
+  TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,    TX_8X8 - TX_8X8,
+  // 32x8             16x64,              64x16
+  TX_8X8 - TX_8X8,    TX_16X16 - TX_8X8,  TX_16X16 - TX_8X8
 };
 #endif  // CONFIG_RECT_TX && (CONFIG_EXT_TX || CONFIG_VAR_TX)
 
@@ -1115,8 +1115,8 @@
   { { BLOCK_128X64, BLOCK_INVALID }, { BLOCK_64X64, BLOCK_64X32 } },
   { { BLOCK_128X128, BLOCK_128X64 }, { BLOCK_64X128, BLOCK_64X64 } },
 #endif  // CONFIG_EXT_PARTITION
-  { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_INVALID } },
-  { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_INVALID } },
+  { { BLOCK_4X16, BLOCK_4X8 }, { BLOCK_INVALID, BLOCK_4X8 } },
+  { { BLOCK_16X4, BLOCK_INVALID }, { BLOCK_8X4, BLOCK_8X4 } },
   { { BLOCK_8X32, BLOCK_8X16 }, { BLOCK_INVALID, BLOCK_4X16 } },
   { { BLOCK_32X8, BLOCK_INVALID }, { BLOCK_16X8, BLOCK_16X4 } },
   { { BLOCK_16X64, BLOCK_16X32 }, { BLOCK_INVALID, BLOCK_8X32 } },
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 6bd452e..aed8f9b 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -2913,17 +2913,17 @@
         0, 0, 0, 0, 0, 0 },
       // 16x16 -> 8x8
       { AOM_ICDF(22272), AOM_ICDF(23768), AOM_ICDF(25043), AOM_ICDF(29996),
-        AOM_ICDF(30744), AOM_ICDF(31493), AOM_ICDF(32130), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(30495), AOM_ICDF(30994), AOM_ICDF(31419), AOM_ICDF(31844),
+        AOM_ICDF(32343), AOM_ICDF(32768), 0 },
       { AOM_ICDF(11776), AOM_ICDF(13457), AOM_ICDF(16315), AOM_ICDF(28229),
-        AOM_ICDF(29069), AOM_ICDF(29910), AOM_ICDF(31339), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(28789), AOM_ICDF(29349), AOM_ICDF(30302), AOM_ICDF(31255),
+        AOM_ICDF(31816), AOM_ICDF(32768), 0 },
       { AOM_ICDF(10496), AOM_ICDF(14802), AOM_ICDF(16136), AOM_ICDF(27127),
-        AOM_ICDF(29280), AOM_ICDF(31434), AOM_ICDF(32101), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(28563), AOM_ICDF(29999), AOM_ICDF(30444), AOM_ICDF(30889),
+        AOM_ICDF(32324), AOM_ICDF(32768), 0 },
       { AOM_ICDF(6784), AOM_ICDF(8763), AOM_ICDF(10440), AOM_ICDF(29110),
-        AOM_ICDF(30100), AOM_ICDF(31090), AOM_ICDF(31929), AOM_ICDF(32768), 0,
-        0, 0 },
+        AOM_ICDF(29770), AOM_ICDF(30430), AOM_ICDF(30989), AOM_ICDF(31548),
+        AOM_ICDF(32208), AOM_ICDF(32768), 0 },
       // 32x32 -> 16x16
       { AOM_ICDF(22656), AOM_ICDF(23801), AOM_ICDF(24702), AOM_ICDF(30721),
         AOM_ICDF(31103), AOM_ICDF(31485), AOM_ICDF(31785), AOM_ICDF(32085),
@@ -4887,10 +4887,10 @@
 
 #if CONFIG_EXT_PARTITION_TYPES
 int av1_num_partition_types[PARTITION_BLOCK_SIZES] = {
-  PARTITION_TYPES,          // 8x8: The 4 traditional partitions
-  EXT_PARTITION_TYPES - 2,  // 16x16: All but 4:1 and 1:4 partitions
-  EXT_PARTITION_TYPES,      // 32x32: All partitions
-  EXT_PARTITION_TYPES,      // 64x64: All partitions
+  PARTITION_TYPES,      // 8x8: The 4 traditional partitions
+  EXT_PARTITION_TYPES,  // 16x16: All partitions
+  EXT_PARTITION_TYPES,  // 32x32: All partitions
+  EXT_PARTITION_TYPES,  // 64x64: All partitions
 #if CONFIG_EXT_PARTITION
   EXT_PARTITION_TYPES - 2  // 128x128: All but 4:1 and 1:4 partitions
 #endif
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index a6c746e..a5006f0 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -667,11 +667,12 @@
   for (i = 0; i < MAX_MB_PLANE; ++i) {
     struct macroblockd_plane *const pd = &xd->plane[i];
 #if CONFIG_CHROMA_SUB8X8
-    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
-      // Offset the buffer pointer
-      if (pd->subsampling_y && (mi_row & 0x01)) row_offset = mi_row - 1;
-      if (pd->subsampling_x && (mi_col & 0x01)) col_offset = mi_col - 1;
-    }
+    // Offset the buffer pointer
+    const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
+    if (pd->subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
+      row_offset = mi_row - 1;
+    if (pd->subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
+      col_offset = mi_col - 1;
 #endif
     int above_idx = col_offset << (MI_SIZE_LOG2 - tx_size_wide_log2[0]);
     int left_idx = (row_offset & MAX_MIB_MASK)
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 0280ce2..59f0327 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -493,11 +493,11 @@
                                     const struct scale_factors *scale,
                                     int subsampling_x, int subsampling_y) {
 #if CONFIG_CHROMA_SUB8X8
-  if (bsize < BLOCK_8X8) {
-    // Offset the buffer pointer
-    if (subsampling_y && (mi_row & 0x01)) mi_row -= 1;
-    if (subsampling_x && (mi_col & 0x01)) mi_col -= 1;
-  }
+  // Offset the buffer pointer
+  if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
+    mi_row -= 1;
+  if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
+    mi_col -= 1;
 #else
   (void)bsize;
 #endif
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 2ff56a6..ef651b0 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4300,7 +4300,7 @@
   // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the
   //   chroma plane
   // * Add support for supertx
-  if ((bsize == BLOCK_32X32 || bsize == BLOCK_64X64) &&
+  if ((bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16) &&
       partition_horz_allowed && !force_horz_split &&
       (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) {
     int i;
@@ -4356,7 +4356,7 @@
 #endif
   }
   // PARTITION_VERT_4
-  if ((bsize == BLOCK_32X32 || bsize == BLOCK_64X64) &&
+  if ((bsize == BLOCK_64X64 || bsize == BLOCK_32X32 || bsize == BLOCK_16X16) &&
       partition_vert_allowed && !force_vert_split &&
       (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) {
     int i;