Make superblock size variable at the frame level.

The uncompressed frame header contains a bit to signal whether the
frame is encoded using 64x64 or 128x128 superblocks. This can vary
between any 2 frames.

vpxenc gained the --sb-size={64,128,dynamic} option, which allows the
configuration of the superblock size used (default is dynamic). 64/128
will force the encoder to always use the specified superblock size.
Dynamic would enable the encoder to choose the sb size for each
frame, but this is not implemented yet (dynamic does the same as 128
for now).

Constraints on tile sizes depend on the superblock size, the following
is a summary of the current bitstream syntax and semantics:

If both --enable-ext-tile is OFF and --enable-ext-partition is OFF:
     The tile coding in this case is the same as VP9. In particular,
     tiles have a minimum width of 256 pixels and a maximum width of
     4096 pixels. The tile width must be multiples of 64 pixels
     (except for the rightmost tile column). There can be a maximum
     of 64 tile columns and 4 tile rows.

If --enable-ext-tile is OFF and --enable-ext-partition is ON:
     Same constraints as above, except that tile width must be
     multiples of 128 pixels (except for the rightmost tile column).

There is no change in the bitstream syntax used for coding the tile
configuration if --enable-ext-tile is OFF.

If --enable-ext-tile is ON and --enable-ext-partition is ON:
     This is the new large scale tile coding configuration. The
     minimum/maximum tile width and height are 64/4096 pixels. Tile
     width and height must be multiples of 64 pixels. The uncompressed
     header contains two 6 bit fields that hold the tile width/heigh
     in units of 64 pixels. The maximum number of tile rows/columns
     is only limited by the maximum frame size of 65536x65536 pixels
     that can be coded in the bitstream. This yields a maximum of
     1024x1024 tile rows and columns (of 64x64 tiles in a 65536x65536
     frame).

If both --enable-ext-tile is ON and --enable-ext-partition is ON:
     Same applies as above, except that in the bitstream the 2 fields
     containing the tile width/height are in units of the superblock
     size, and the superblock size itself is also coded in the bitstream.
     If the uncompressed header signals the use of 64x64 superblocks,
     then the tile width/height fields are 6 bits wide and are in units
     of 64 pixels. If the uncompressed header signals the use of 128x128
     superblocks, then the tile width/height fields are 5 bits wide and
     are in units of 128 pixels.

The above is a summary of the bitstream. The user interface to vpxenc
(and the equivalent encoder API) behaves a follows:

If --enable-ext-tile is OFF:
     No change in the user interface. --tile-columns and --tile-rows
     specify the base 2 logarithm of the desired number of tile columns
     and tile rows. The actual number of tile rows and tile columns,
     and the particular tile width and tile height are computed by the
     codec ensuring all of the above constraints are respected.

If --enable-ext-tile is ON, but --enable-ext-partition is OFF:
     No change in the user interface. --tile-columns and --tile-rows
     specify the WIDTH and HEIGHT of the tiles in unit of 64 pixels.
     The valid values are in the range [1, 64] (which corresponds to
     [64, 4096] pixels in increments of 64.

If both --enable-ext-tile is ON and --enable-ext-partition is ON:
     If --sb-size=64 (default):
         The user interface is the same as in the previous point.
         --tile-columns and --tile-rows specify tile WIDTH and HEIGHT,
         in units of 64 pixels, in the range [1, 64] (which corresponds
         to [64, 4096] pixels in increments of 64).
     If --sb-size=128 or --sb-size=dynamic:
         --tile-columns and --tile-rows specify tile WIDTH and HEIGHT,
         in units of 128 pixels in the range [1, 32] (which corresponds
         to [128, 4096] pixels in increments of 128).

Change-Id: Idc9beee1ad12ff1634e83671985d14c680f9179a
diff --git a/vp10/common/alloccommon.c b/vp10/common/alloccommon.c
index dd58e6d..abdc72b 100644
--- a/vp10/common/alloccommon.c
+++ b/vp10/common/alloccommon.c
@@ -134,7 +134,8 @@
     // TODO(geza.lore): These are bigger than they need to be.
     // cm->tile_width would be enough but it complicates indexing a
     // little elsewhere.
-    const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+    const int aligned_mi_cols =
+        ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
     int i;
 
     for (i = 0 ; i < MAX_MB_PLANE ; i++) {
diff --git a/vp10/common/enums.h b/vp10/common/enums.h
index 645ae86..e144a45 100644
--- a/vp10/common/enums.h
+++ b/vp10/common/enums.h
@@ -41,6 +41,7 @@
 #define MAX_MIB_MASK    (MAX_MIB_SIZE - 1)
 #define MAX_MIB_MASK_2  (MAX_MIB_SIZE * 2 - 1)
 
+// Maximum number of tile rows and tile columns
 #if CONFIG_EXT_TILE
 # define  MAX_TILE_ROWS 1024
 # define  MAX_TILE_COLS 1024
diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c
index 8e75522..23c131d 100644
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -863,10 +863,8 @@
   const int shift_32_uv[] = {0, 2, 8, 10};
   const int shift_16_uv[] = {0, 1, 4, 5};
   int i;
-  const int max_rows = (mi_row + MAX_MIB_SIZE > cm->mi_rows ?
-                        cm->mi_rows - mi_row : MAX_MIB_SIZE);
-  const int max_cols = (mi_col + MAX_MIB_SIZE > cm->mi_cols ?
-                        cm->mi_cols - mi_col : MAX_MIB_SIZE);
+  const int max_rows = VPXMIN(cm->mi_rows - mi_row, MAX_MIB_SIZE);
+  const int max_cols = VPXMIN(cm->mi_cols - mi_col, MAX_MIB_SIZE);
 #if CONFIG_EXT_PARTITION
   assert(0 && "Not yet updated");
 #endif  // CONFIG_EXT_PARTITION
@@ -1206,13 +1204,12 @@
 
 void vp10_filter_block_plane_non420(VP10_COMMON *cm,
                                     struct macroblockd_plane *plane,
-                                    MODE_INFO **mi_8x8,
+                                    MODE_INFO **mib,
                                     int mi_row, int mi_col) {
   const int ss_x = plane->subsampling_x;
   const int ss_y = plane->subsampling_y;
   const int row_step = 1 << ss_y;
   const int col_step = 1 << ss_x;
-  const int row_step_stride = cm->mi_stride * row_step;
   struct buf_2d *const dst = &plane->dst;
   uint8_t* const dst0 = dst->buf;
   unsigned int mask_16x16[MAX_MIB_SIZE] = {0};
@@ -1222,15 +1219,15 @@
   uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE];
   int r, c;
 
-  for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
     unsigned int mask_16x16_c = 0;
     unsigned int mask_8x8_c = 0;
     unsigned int mask_4x4_c = 0;
     unsigned int border_mask;
 
     // Determine the vertical edges that need filtering
-    for (c = 0; c < MAX_MIB_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
-      const MODE_INFO *mi = mi_8x8[c];
+    for (c = 0; c < cm->mib_size && mi_col + c < cm->mi_cols; c += col_step) {
+      const MODE_INFO *mi = mib[c];
       const MB_MODE_INFO *mbmi = &mi[0].mbmi;
       const BLOCK_SIZE sb_type = mbmi->sb_type;
       const int skip_this = mbmi->skip && is_inter_block(mbmi);
@@ -1378,13 +1375,13 @@
                             mask_4x4_int[r],
                             &cm->lf_info, &lfl[r][0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-    dst->buf += 8 * dst->stride;
-    mi_8x8 += row_step_stride;
+    dst->buf += MI_SIZE * dst->stride;
+    mib += row_step * cm->mi_stride;
   }
 
   // Now do horizontal pass
   dst->buf = dst0;
-  for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
     const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
     const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
 
@@ -1428,7 +1425,7 @@
                              mask_4x4_int_r,
                              &cm->lf_info, &lfl[r][0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-    dst->buf += 8 * dst->stride;
+    dst->buf += MI_SIZE * dst->stride;
   }
 }
 
@@ -1447,7 +1444,7 @@
   assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
 
   // Vertical pass: do 2 rows at one time
-  for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 2) {
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
     unsigned int mask_16x16_l = mask_16x16 & 0xffff;
     unsigned int mask_8x8_l = mask_8x8 & 0xffff;
     unsigned int mask_4x4_l = mask_4x4 & 0xffff;
@@ -1472,11 +1469,11 @@
         mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
         &lfm->lfl_y[r][0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-    dst->buf += 16 * dst->stride;
-    mask_16x16 >>= 16;
-    mask_8x8 >>= 16;
-    mask_4x4 >>= 16;
-    mask_4x4_int >>= 16;
+    dst->buf += 2 * MI_SIZE * dst->stride;
+    mask_16x16 >>= 2 * MI_SIZE;
+    mask_8x8 >>= 2 * MI_SIZE;
+    mask_4x4 >>= 2 * MI_SIZE;
+    mask_4x4_int >>= 2 * MI_SIZE;
   }
 
   // Horizontal pass
@@ -1486,7 +1483,7 @@
   mask_4x4 = lfm->above_y[TX_4X4];
   mask_4x4_int = lfm->int_4x4_y;
 
-  for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r++) {
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
     unsigned int mask_16x16_r;
     unsigned int mask_8x8_r;
     unsigned int mask_4x4_r;
@@ -1519,11 +1516,11 @@
                              &lfm->lfl_y[r][0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    dst->buf += 8 * dst->stride;
-    mask_16x16 >>= 8;
-    mask_8x8 >>= 8;
-    mask_4x4 >>= 8;
-    mask_4x4_int >>= 8;
+    dst->buf += MI_SIZE * dst->stride;
+    mask_16x16 >>= MI_SIZE;
+    mask_8x8 >>= MI_SIZE;
+    mask_4x4 >>= MI_SIZE;
+    mask_4x4_int >>= MI_SIZE;
   }
 }
 
@@ -1541,14 +1538,13 @@
   uint16_t mask_4x4_int = lfm->left_int_4x4_uv;
 
   assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
+  assert(plane->plane_type == PLANE_TYPE_UV);
 
   // Vertical pass: do 2 rows at one time
-  for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 4) {
-    if (plane->plane_type == 1) {
-      for (c = 0; c < (MAX_MIB_SIZE >> 1); c++) {
-        lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
-        lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
-      }
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 4) {
+    for (c = 0; c < (cm->mib_size >> 1); c++) {
+      lfm->lfl_uv[r >> 1][c] = lfm->lfl_y[r][c << 1];
+      lfm->lfl_uv[(r + 2) >> 1][c] = lfm->lfl_y[r + 2][c << 1];
     }
 
     {
@@ -1577,11 +1573,11 @@
           &lfm->lfl_uv[r >> 1][0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-      dst->buf += 16 * dst->stride;
-      mask_16x16 >>= 8;
-      mask_8x8 >>= 8;
-      mask_4x4 >>= 8;
-      mask_4x4_int >>= 8;
+      dst->buf += 2 * MI_SIZE * dst->stride;
+      mask_16x16 >>= MI_SIZE;
+      mask_8x8 >>= MI_SIZE;
+      mask_4x4 >>= MI_SIZE;
+      mask_4x4_int >>= MI_SIZE;
     }
   }
 
@@ -1592,7 +1588,7 @@
   mask_4x4 = lfm->above_uv[TX_4X4];
   mask_4x4_int = lfm->above_int_4x4_uv;
 
-  for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 2) {
+  for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
     const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
     const unsigned int mask_4x4_int_r =
         skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
@@ -1628,11 +1624,11 @@
                              &lfm->lfl_uv[r >> 1][0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-    dst->buf += 8 * dst->stride;
-    mask_16x16 >>= 4;
-    mask_8x8 >>= 4;
-    mask_4x4 >>= 4;
-    mask_4x4_int >>= 4;
+    dst->buf += MI_SIZE * dst->stride;
+    mask_16x16 >>= MI_SIZE / 2;
+    mask_8x8 >>= MI_SIZE / 2;
+    mask_4x4 >>= MI_SIZE / 2;
+    mask_4x4_int >>= MI_SIZE / 2;
   }
 }
 
@@ -1647,12 +1643,12 @@
 # if CONFIG_VAR_TX
   memset(cm->above_txfm_context, TX_SIZES, cm->mi_cols);
 # endif  // CONFIG_VAR_TX
-  for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
+  for (mi_row = start; mi_row < stop; mi_row += cm->mib_size) {
     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 # if CONFIG_VAR_TX
     memset(cm->left_txfm_context, TX_SIZES, MAX_MIB_SIZE);
 # endif  // CONFIG_VAR_TX
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
       int plane;
 
       vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h
index d122495..3ac17e2 100644
--- a/vp10/common/onyxc_int.h
+++ b/vp10/common/onyxc_int.h
@@ -312,7 +312,7 @@
   int log2_tile_cols, log2_tile_rows;
 #endif  // !CONFIG_EXT_TILE
   int tile_cols, tile_rows;
-  int tile_width, tile_height;
+  int tile_width, tile_height;  // In MI units
 
   int byte_alignment;
   int skip_loop_filter;
@@ -343,6 +343,10 @@
 #if CONFIG_ANS
   rans_dec_lut token_tab[COEFF_PROB_MODELS];
 #endif  // CONFIG_ANS
+
+  BLOCK_SIZE sb_size;   // Size of the superblock used for this frame
+  int mib_size;         // Size of the superblock in units of MI blocks
+  int mib_size_log2;    // Log 2 of above.
 } VP10_COMMON;
 
 // TODO(hkuang): Don't need to lock the whole pool after implementing atomic
@@ -408,8 +412,12 @@
   bufs[new_idx].ref_count++;
 }
 
-static INLINE int mi_cols_aligned_to_sb(int n_mis) {
-  return ALIGN_POWER_OF_TWO(n_mis, MAX_MIB_SIZE_LOG2);
+static INLINE int mi_cols_aligned_to_sb(const VP10_COMMON *cm) {
+  return ALIGN_POWER_OF_TWO(cm->mi_cols, cm->mib_size_log2);
+}
+
+static INLINE int mi_rows_aligned_to_sb(const VP10_COMMON *cm) {
+  return ALIGN_POWER_OF_TWO(cm->mi_rows, cm->mib_size_log2);
 }
 
 static INLINE int frame_is_intra_only(const VP10_COMMON *const cm) {
@@ -697,6 +705,13 @@
   }
 }
 
+static INLINE void set_sb_size(VP10_COMMON *const cm,
+                               const BLOCK_SIZE sb_size) {
+  cm->sb_size = sb_size;
+  cm->mib_size = num_8x8_blocks_wide_lookup[cm->sb_size];
+  cm->mib_size_log2 = mi_width_log2_lookup[cm->sb_size];
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index e685745..a94aafd 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -94,7 +94,7 @@
                              int start, int stop, int y_only,
                              VP9LfSync *const lf_sync) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
-  const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MAX_MIB_SIZE_LOG2;
+  const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2;
   int mi_row, mi_col;
 #if !CONFIG_EXT_PARTITION_TYPES
   enum lf_path path;
@@ -116,12 +116,12 @@
 #endif  // CONFIG_EXT_PARTITION
 
   for (mi_row = start; mi_row < stop;
-       mi_row += lf_sync->num_workers * MAX_MIB_SIZE) {
+       mi_row += lf_sync->num_workers * cm->mib_size) {
     MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
-      const int r = mi_row >> MAX_MIB_SIZE_LOG2;
-      const int c = mi_col >> MAX_MIB_SIZE_LOG2;
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) {
+      const int r = mi_row >> cm->mib_size_log2;
+      const int c = mi_col >> cm->mib_size_log2;
       int plane;
 
       sync_read(lf_sync, r, c);
@@ -175,7 +175,7 @@
                                 VP9LfSync *lf_sync) {
   const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
   // Number of superblock rows and cols
-  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MAX_MIB_SIZE_LOG2;
+  const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2;
   // Decoder may allocate more threads than number of tiles based on user's
   // input.
   const int tile_cols = cm->tile_cols;
@@ -215,7 +215,7 @@
 
     // Loopfilter data
     vp10_loop_filter_data_reset(lf_data, frame, cm, planes);
-    lf_data->start = start + i * MAX_MIB_SIZE;
+    lf_data->start = start + i * cm->mib_size;
     lf_data->stop = stop;
     lf_data->y_only = y_only;
 
diff --git a/vp10/common/tile_common.c b/vp10/common/tile_common.c
index 36ec5d3..04b19eb 100644
--- a/vp10/common/tile_common.c
+++ b/vp10/common/tile_common.c
@@ -12,9 +12,6 @@
 #include "vp10/common/onyxc_int.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 
-#define MIN_TILE_WIDTH_B64 4
-#define MAX_TILE_WIDTH_B64 64
-
 void vp10_tile_set_row(TileInfo *tile, const VP10_COMMON *cm, int row) {
   tile->mi_row_start = row * cm->tile_height;
   tile->mi_row_end   = VPXMIN(tile->mi_row_start + cm->tile_height,
@@ -33,26 +30,35 @@
 }
 
 #if !CONFIG_EXT_TILE
-// TODO(geza.lore): CU_SIZE dependent.
-static int get_min_log2_tile_cols(const int sb64_cols) {
+
+# if CONFIG_EXT_PARTITION
+#   define MIN_TILE_WIDTH_MAX_SB 2
+#   define MAX_TILE_WIDTH_MAX_SB 32
+# else
+#   define MIN_TILE_WIDTH_MAX_SB 4
+#   define MAX_TILE_WIDTH_MAX_SB 64
+# endif  // CONFIG_EXT_PARTITION
+
+static int get_min_log2_tile_cols(const int max_sb_cols) {
   int min_log2 = 0;
-  while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols)
+  while ((MAX_TILE_WIDTH_MAX_SB << min_log2) < max_sb_cols)
     ++min_log2;
   return min_log2;
 }
 
-static int get_max_log2_tile_cols(const int sb64_cols) {
+static int get_max_log2_tile_cols(const int max_sb_cols) {
   int max_log2 = 1;
-  while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64)
+  while ((max_sb_cols >> max_log2) >= MIN_TILE_WIDTH_MAX_SB)
     ++max_log2;
   return max_log2 - 1;
 }
 
-void vp10_get_tile_n_bits(int mi_cols,
+void vp10_get_tile_n_bits(const int mi_cols,
                           int *min_log2_tile_cols, int *max_log2_tile_cols) {
-  const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MAX_MIB_SIZE_LOG2;
-  *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols);
-  *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols);
+  const int max_sb_cols =
+      ALIGN_POWER_OF_TWO(mi_cols, MAX_MIB_SIZE_LOG2) >> MAX_MIB_SIZE_LOG2;
+  *min_log2_tile_cols = get_min_log2_tile_cols(max_sb_cols);
+  *max_log2_tile_cols = get_max_log2_tile_cols(max_sb_cols);
   assert(*min_log2_tile_cols <= *max_log2_tile_cols);
 }
 #endif  // !CONFIG_EXT_TILE
diff --git a/vp10/common/tile_common.h b/vp10/common/tile_common.h
index 09cf060..2babc89 100644
--- a/vp10/common/tile_common.h
+++ b/vp10/common/tile_common.h
@@ -30,8 +30,8 @@
 void vp10_tile_set_row(TileInfo *tile, const struct VP10Common *cm, int row);
 void vp10_tile_set_col(TileInfo *tile, const struct VP10Common *cm, int col);
 
-void vp10_get_tile_n_bits(int mi_cols,
-                         int *min_log2_tile_cols, int *max_log2_tile_cols);
+void vp10_get_tile_n_bits(const int mi_cols,
+                          int *min_log2_tile_cols, int *max_log2_tile_cols);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 8cc6b84..af57ac6 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -2817,16 +2817,24 @@
   pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
 }
 
-static void setup_tile_info(VP10Decoder *const pbi,
+static void read_tile_info(VP10Decoder *const pbi,
                             struct vpx_read_bit_buffer *const rb) {
   VP10_COMMON *const cm = &pbi->common;
 #if CONFIG_EXT_TILE
   // Read the tile width/height
-  cm->tile_width  = vpx_rb_read_literal(rb, 6) + 1;   // in [1, 64]
-  cm->tile_height = vpx_rb_read_literal(rb, 6) + 1;   // in [1, 64]
+#if CONFIG_EXT_PARTITION
+  if (cm->sb_size == BLOCK_128X128) {
+    cm->tile_width  = vpx_rb_read_literal(rb, 5) + 1;
+    cm->tile_height = vpx_rb_read_literal(rb, 5) + 1;
+  } else
+#endif  // CONFIG_EXT_PARTITION
+  {
+    cm->tile_width  = vpx_rb_read_literal(rb, 6) + 1;
+    cm->tile_height = vpx_rb_read_literal(rb, 6) + 1;
+  }
 
-  cm->tile_width  = cm->tile_width << MAX_MIB_SIZE_LOG2;
-  cm->tile_height = cm->tile_height << MAX_MIB_SIZE_LOG2;
+  cm->tile_width  <<= cm->mib_size_log2;
+  cm->tile_height <<= cm->mib_size_log2;
 
   cm->tile_width  = VPXMIN(cm->tile_width, cm->mi_cols);
   cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
@@ -2867,12 +2875,14 @@
   cm->tile_cols = 1 << cm->log2_tile_cols;
   cm->tile_rows = 1 << cm->log2_tile_rows;
 
-  cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
-  cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
+  cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+  cm->tile_width >>= cm->log2_tile_cols;
+  cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  cm->tile_height >>= cm->log2_tile_rows;
 
-  // round to integer multiples of 8
-  cm->tile_width  = mi_cols_aligned_to_sb(cm->tile_width);
-  cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
+  // round to integer multiples of superblock size
+  cm->tile_width  = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
+  cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
 
   // tile size magnitude
   if (cm->tile_rows > 1 || cm->tile_cols > 1) {
@@ -3107,8 +3117,7 @@
   int tile_row, tile_col;
 
 #if CONFIG_ENTROPY
-  cm->do_subframe_update =
-      cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0;
+  cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
 #endif  // CONFIG_ENTROPY
 
   if (cm->lf.filter_level && !cm->skip_loop_filter &&
@@ -3192,19 +3201,19 @@
       vp10_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end);
 
       for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
-           mi_row += MAX_MIB_SIZE) {
+           mi_row += cm->mib_size) {
         int mi_col;
 
         vp10_zero_left_context(&td->xd);
 
         for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
-             mi_col += MAX_MIB_SIZE) {
+             mi_col += cm->mib_size) {
           decode_partition(pbi, &td->xd,
 #if CONFIG_SUPERTX
                            0,
 #endif  // CONFIG_SUPERTX
                            mi_row, mi_col, &td->bit_reader,
-                           BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
+                           cm->sb_size, b_width_log2_lookup[cm->sb_size]);
         }
         pbi->mb.corrupted |= td->xd.corrupted;
         if (pbi->mb.corrupted)
@@ -3231,8 +3240,8 @@
     // Loopfilter one tile row.
     if (cm->lf.filter_level && !cm->skip_loop_filter) {
       LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
-      const int lf_start = VPXMAX(0, tile_info.mi_row_start - MAX_MIB_SIZE);
-      const int lf_end = tile_info.mi_row_end - MAX_MIB_SIZE;
+      const int lf_start = VPXMAX(0, tile_info.mi_row_start - cm->mib_size);
+      const int lf_end = tile_info.mi_row_end - cm->mib_size;
 
       // Delay the loopfilter if the first tile row is only
       // a single superblock high.
@@ -3256,7 +3265,7 @@
     // After loopfiltering, the last 7 row pixels in each superblock row may
     // still be changed by the longest loopfilter of the next superblock row.
     if (cm->frame_parallel_decode)
-      vp10_frameworker_broadcast(pbi->cur_buf, mi_row << MAX_MIB_SIZE_LOG2);
+      vp10_frameworker_broadcast(pbi->cur_buf, mi_row << cm->mib_size_log2);
 #endif  // !CONFIG_VAR_TX
   }
 
@@ -3292,6 +3301,7 @@
 static int tile_worker_hook(TileWorkerData *const tile_data,
                             const TileInfo *const tile) {
   VP10Decoder *const pbi = tile_data->pbi;
+  const VP10_COMMON *const cm = &pbi->common;
   int mi_row, mi_col;
 
   if (setjmp(tile_data->error_info.jmp)) {
@@ -3306,17 +3316,17 @@
   vp10_zero_above_context(&pbi->common, tile->mi_col_start, tile->mi_col_end);
 
   for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
-       mi_row += MAX_MIB_SIZE) {
+       mi_row += cm->mib_size) {
     vp10_zero_left_context(&tile_data->xd);
 
     for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
-         mi_col += MAX_MIB_SIZE) {
+         mi_col += cm->mib_size) {
       decode_partition(pbi, &tile_data->xd,
 #if CONFIG_SUPERTX
                        0,
 #endif
                        mi_row, mi_col, &tile_data->bit_reader,
-                       BLOCK_LARGEST, MAX_SB_SIZE_LOG2 - 2);
+                       cm->sb_size, b_width_log2_lookup[cm->sb_size]);
     }
   }
   return !tile_data->xd.corrupted;
@@ -3769,6 +3779,12 @@
   if (frame_is_intra_only(cm) || cm->error_resilient_mode)
     vp10_setup_past_independence(cm);
 
+#if CONFIG_EXT_PARTITION
+  set_sb_size(cm, vpx_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64);
+#else
+  set_sb_size(cm, BLOCK_64X64);
+#endif  // CONFIG_EXT_PARTITION
+
   setup_loopfilter(cm, rb);
 #if CONFIG_LOOP_RESTORATION
   setup_restoration(cm, rb);
@@ -3808,7 +3824,7 @@
                                                       : read_tx_mode(rb);
   cm->reference_mode = read_frame_reference_mode(cm, rb);
 
-  setup_tile_info(pbi, rb);
+  read_tile_info(pbi, rb);
   sz = vpx_rb_read_literal(rb, 16);
 
   if (sz == 0)
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index 5b0633d..b52696d 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -1170,7 +1170,7 @@
 static void fpm_sync(void *const data, int mi_row) {
   VP10Decoder *const pbi = (VP10Decoder *)data;
   vp10_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
-                       mi_row << MAX_MIB_SIZE_LOG2);
+                       mi_row << pbi->common.mib_size_log2);
 }
 
 static void read_inter_block_mode_info(VP10Decoder *const pbi,
diff --git a/vp10/encoder/aq_complexity.c b/vp10/encoder/aq_complexity.c
index 9f73ecc..a4c38d1 100644
--- a/vp10/encoder/aq_complexity.c
+++ b/vp10/encoder/aq_complexity.c
@@ -116,8 +116,6 @@
   VP10_COMMON *const cm = &cpi->common;
 
   const int mi_offset = mi_row * cm->mi_cols + mi_col;
-  const int bw = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
-  const int bh = num_8x8_blocks_high_lookup[BLOCK_LARGEST];
   const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
   const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
   int x, y;
@@ -130,7 +128,7 @@
     // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh).
     // It is converted to bits * 256 units.
     const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
-                            (bw * bh);
+                            (cm->mib_size * cm->mib_size);
     double logvar;
     double low_var_thresh;
     const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth);
diff --git a/vp10/encoder/aq_cyclicrefresh.c b/vp10/encoder/aq_cyclicrefresh.c
index dd98f3a..057c057 100644
--- a/vp10/encoder/aq_cyclicrefresh.c
+++ b/vp10/encoder/aq_cyclicrefresh.c
@@ -388,8 +388,8 @@
   int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
   int xmis, ymis, x, y;
   memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols);
-  sb_cols = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
-  sb_rows = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
+  sb_cols = (cm->mi_cols + cm->mib_size - 1) / cm->mib_size;
+  sb_rows = (cm->mi_rows + cm->mib_size - 1) / cm->mib_size;
   sbs_in_frame = sb_cols * sb_rows;
   // Number of target blocks to get the q delta (segment 1).
   block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
@@ -404,8 +404,8 @@
     // Get the mi_row/mi_col corresponding to superblock index i.
     int sb_row_index = (i / sb_cols);
     int sb_col_index = i - sb_row_index * sb_cols;
-    int mi_row = sb_row_index * MAX_MIB_SIZE;
-    int mi_col = sb_col_index * MAX_MIB_SIZE;
+    int mi_row = sb_row_index * cm->mib_size;
+    int mi_col = sb_col_index * cm->mib_size;
     int qindex_thresh =
         cpi->oxcf.content == VP9E_CONTENT_SCREEN
             ? vp10_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
@@ -413,11 +413,9 @@
     assert(mi_row >= 0 && mi_row < cm->mi_rows);
     assert(mi_col >= 0 && mi_col < cm->mi_cols);
     bl_index = mi_row * cm->mi_cols + mi_col;
-    // Loop through all 8x8 blocks in superblock and update map.
-    xmis =
-        VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_LARGEST]);
-    ymis =
-        VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_LARGEST]);
+    // Loop through all MI blocks in superblock and update map.
+    xmis = VPXMIN(cm->mi_cols - mi_col, cm->mib_size);
+    ymis = VPXMIN(cm->mi_rows - mi_row, cm->mib_size);
     for (y = 0; y < ymis; y++) {
       for (x = 0; x < xmis; x++) {
         const int bl_index2 = bl_index + y * cm->mi_cols + x;
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index f402acb..da1885d 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -1498,6 +1498,7 @@
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   MODE_INFO *m;
   int plane;
+  int bh, bw;
 #if CONFIG_ANS
   (void) tok;
   (void) tok_end;
@@ -1507,12 +1508,14 @@
   xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
   m = xd->mi[0];
 
+  assert(m->mbmi.sb_type <= cm->sb_size);
+
+  bh = num_8x8_blocks_high_lookup[m->mbmi.sb_type];
+  bw = num_8x8_blocks_wide_lookup[m->mbmi.sb_type];
+
   cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
 
-  set_mi_row_col(xd, tile,
-                 mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
-                 mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type],
-                 cm->mi_rows, cm->mi_cols);
+  set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
   if (frame_is_intra_only(cm)) {
     write_mb_modes_kf(cm, xd, xd->mi, w);
   } else {
@@ -1660,7 +1663,7 @@
   const BLOCK_SIZE subsize =  get_subsize(bsize, partition);
 #if CONFIG_SUPERTX
   const int mi_offset = mi_row * cm->mi_stride + mi_col;
-  MB_MODE_INFO *mbmi = NULL;
+  MB_MODE_INFO *mbmi;
   const int pack_token = !supertx_enabled;
   TX_SIZE supertx_size;
   int plane;
@@ -1835,12 +1838,12 @@
 
   vp10_zero_above_context(cm, mi_col_start, mi_col_end);
 
-  for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MAX_MIB_SIZE) {
+  for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += cm->mib_size) {
     vp10_zero_left_context(xd);
 
-    for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MAX_MIB_SIZE) {
+    for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += cm->mib_size) {
       write_modes_sb_wrapper(cpi, tile, w, tok, tok_end, 0,
-                             mi_row, mi_col, BLOCK_LARGEST);
+                             mi_row, mi_col, cm->sb_size);
     }
   }
 }
@@ -2529,21 +2532,32 @@
   }
 }
 
-static void write_tile_info(VP10_COMMON *const cm,
+static void write_tile_info(const VP10_COMMON *const cm,
                             struct vpx_write_bit_buffer *wb) {
 #if CONFIG_EXT_TILE
-  // TODO(geza.lore): Dependent on CU_SIZE
   const int tile_width  =
-            mi_cols_aligned_to_sb(cm->tile_width) >> MAX_MIB_SIZE_LOG2;
+    ALIGN_POWER_OF_TWO(cm->tile_width, cm->mib_size_log2) >> cm->mib_size_log2;
   const int tile_height =
-            mi_cols_aligned_to_sb(cm->tile_height) >> MAX_MIB_SIZE_LOG2;
+    ALIGN_POWER_OF_TWO(cm->tile_height, cm->mib_size_log2) >> cm->mib_size_log2;
 
-  assert(tile_width > 0 && tile_width <= 64);
-  assert(tile_height > 0 && tile_height <= 64);
+  assert(tile_width > 0);
+  assert(tile_height > 0);
 
   // Write the tile sizes
-  vpx_wb_write_literal(wb, tile_width - 1, 6);
-  vpx_wb_write_literal(wb, tile_height - 1, 6);
+#if CONFIG_EXT_PARTITION
+  if (cm->sb_size == BLOCK_128X128) {
+    assert(tile_width <= 32);
+    assert(tile_height <= 32);
+    vpx_wb_write_literal(wb, tile_width - 1, 5);
+    vpx_wb_write_literal(wb, tile_height - 1, 5);
+  } else
+#endif  // CONFIG_EXT_PARTITION
+  {
+    assert(tile_width <= 64);
+    assert(tile_height <= 64);
+    vpx_wb_write_literal(wb, tile_width - 1, 6);
+    vpx_wb_write_literal(wb, tile_height - 1, 6);
+  }
 #else
   int min_log2_tile_cols, max_log2_tile_cols, ones;
   vp10_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
@@ -2660,7 +2674,7 @@
                            uint8_t *const dst,
                            unsigned int *max_tile_size,
                            unsigned int *max_tile_col_size) {
-  VP10_COMMON *const cm = &cpi->common;
+  const VP10_COMMON *const cm = &cpi->common;
   vp10_writer mode_bc;
 #if CONFIG_ANS
   struct AnsCoder token_ans;
@@ -2994,6 +3008,15 @@
 
   vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2);
 
+  assert(cm->mib_size == num_8x8_blocks_wide_lookup[cm->sb_size]);
+  assert(cm->mib_size == 1 << cm->mib_size_log2);
+#if CONFIG_EXT_PARTITION
+  assert(cm->sb_size == BLOCK_128X128 || cm->sb_size == BLOCK_64X64);
+  vpx_wb_write_bit(wb, cm->sb_size == BLOCK_128X128 ? 1 : 0);
+#else
+  assert(cm->sb_size == BLOCK_64X64);
+#endif  // CONFIG_EXT_PARTITION
+
   encode_loopfilter(cm, wb);
 #if CONFIG_LOOP_RESTORATION
   encode_restoration(cm, wb);
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 38c2566..6aba475 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -834,8 +834,8 @@
   const uint8_t *d;
   int sp;
   int dp;
-  int pixels_wide = 8 * num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
-  int pixels_high = 8 * num_8x8_blocks_high_lookup[BLOCK_LARGEST];
+  int pixels_wide = 8 * num_8x8_blocks_wide_lookup[cm->sb_size];
+  int pixels_high = 8 * num_8x8_blocks_high_lookup[cm->sb_size];
   int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
       cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
 
@@ -850,7 +850,7 @@
   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
     const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map :
                                                     cm->last_frame_seg_map;
-    segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col);
+    segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col);
 
     if (cyclic_refresh_segment_id_boosted(segment_id)) {
       int q = vp10_get_qindex(&cm->seg, segment_id, cm->base_qindex);
@@ -863,7 +863,7 @@
   exit(-1);
 #endif  // CONFIG_EXT_PARTITION
 
-  set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_LARGEST);
+  set_offsets(cpi, tile, x, mi_row, mi_col, cm->sb_size);
 
   if (xd->mb_to_right_edge < 0)
     pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -881,19 +881,19 @@
     const YV12_BUFFER_CONFIG *yv12_g = NULL;
     unsigned int y_sad, y_sad_g;
 
-    const int max_mi_block_size = num_8x8_blocks_wide_lookup[BLOCK_LARGEST];
+    const int max_mi_block_size = cm->mib_size;
     const int is_right_edge = mi_col + max_mi_block_size / 2 > cm->mi_cols;
     const int is_left_edge = mi_row + max_mi_block_size / 2 > cm->mi_rows;
     BLOCK_SIZE bsize;
 
     if (is_right_edge && is_left_edge)
-      bsize = get_subsize(BLOCK_LARGEST, PARTITION_SPLIT);
+      bsize = get_subsize(cm->sb_size, PARTITION_SPLIT);
     else if (is_right_edge)
-      bsize = get_subsize(BLOCK_LARGEST, PARTITION_VERT);
+      bsize = get_subsize(cm->sb_size, PARTITION_VERT);
     else if (is_left_edge)
-      bsize = get_subsize(BLOCK_LARGEST, PARTITION_HORZ);
+      bsize = get_subsize(cm->sb_size, PARTITION_HORZ);
     else
-      bsize = BLOCK_LARGEST;
+      bsize = cm->sb_size;
 
     assert(yv12 != NULL);
     yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
@@ -913,7 +913,7 @@
                          &cm->frame_refs[LAST_FRAME - 1].sf);
     mbmi->ref_frame[0] = LAST_FRAME;
     mbmi->ref_frame[1] = NONE;
-    mbmi->sb_type = BLOCK_LARGEST;
+    mbmi->sb_type = cm->sb_size;
     mbmi->mv[0].as_int = 0;
     mbmi->interp_filter = BILINEAR;
 
@@ -928,9 +928,9 @@
       x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
     }
 
-    vp10_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_LARGEST);
+    vp10_build_inter_predictors_sb(xd, mi_row, mi_col, cm->sb_size);
 
-    for (i = 1; i <= 2; ++i) {
+    for (i = 1; i < MAX_MB_PLANE; ++i) {
       struct macroblock_plane  *p = &x->plane[i];
       struct macroblockd_plane *pd = &xd->plane[i];
       const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
@@ -951,7 +951,7 @@
     // Don't check on boosted segment for now, as largest is suppressed there.
     if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
       if (!is_right_edge && !is_left_edge) {
-        set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_LARGEST);
+        set_block_size(cpi, x, xd, mi_row, mi_col, cm->sb_size);
         return 0;
       }
     }
@@ -2489,7 +2489,7 @@
 }
 
 // Check to see if the given partition size is allowed for a specified number
-// of 8x8 block rows and columns remaining in the image.
+// of mi block rows and columns remaining in the image.
 // If not then return the largest allowed partition size
 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
                                       int rows_left, int cols_left,
@@ -2508,62 +2508,64 @@
   return bsize;
 }
 
-static void set_partial_b64x64_partition(MODE_INFO *mi, int mis,
-    int bh_in, int bw_in, int row8x8_remaining, int col8x8_remaining,
-    BLOCK_SIZE bsize, MODE_INFO **mi_8x8) {
+static void set_partial_sb_partition(const VP10_COMMON *const cm,
+                                     MODE_INFO *mi,
+                                     int bh_in, int bw_in,
+                                     int mi_rows_remaining,
+                                     int mi_cols_remaining,
+                                     BLOCK_SIZE bsize, MODE_INFO **mib) {
   int bh = bh_in;
   int r, c;
-  for (r = 0; r < MAX_MIB_SIZE; r += bh) {
+  for (r = 0; r < cm->mib_size; r += bh) {
     int bw = bw_in;
-    for (c = 0; c < MAX_MIB_SIZE; c += bw) {
-      const int index = r * mis + c;
-      mi_8x8[index] = mi + index;
-      mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
-          row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
+    for (c = 0; c < cm->mib_size; c += bw) {
+      const int index = r * cm->mi_stride + c;
+      mib[index] = mi + index;
+      mib[index]->mbmi.sb_type = find_partition_size(bsize,
+          mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
     }
   }
 }
 
-// This function attempts to set all mode info entries in a given SB64
+// This function attempts to set all mode info entries in a given superblock
 // to the same block partition size.
 // However, at the bottom and right borders of the image the requested size
 // may not be allowed in which case this code attempts to choose the largest
 // allowable partition.
 static void set_fixed_partitioning(VP10_COMP *cpi, const TileInfo *const tile,
-                                   MODE_INFO **mi_8x8, int mi_row, int mi_col,
+                                   MODE_INFO **mib, int mi_row, int mi_col,
                                    BLOCK_SIZE bsize) {
   VP10_COMMON *const cm = &cpi->common;
-  const int mis = cm->mi_stride;
-  const int row8x8_remaining = tile->mi_row_end - mi_row;
-  const int col8x8_remaining = tile->mi_col_end - mi_col;
+  const int mi_rows_remaining = tile->mi_row_end - mi_row;
+  const int mi_cols_remaining = tile->mi_col_end - mi_col;
   int block_row, block_col;
-  MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
+  MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col;
   int bh = num_8x8_blocks_high_lookup[bsize];
   int bw = num_8x8_blocks_wide_lookup[bsize];
 
-  assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
+  assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
 
-  // Apply the requested partition size to the SB64 if it is all "in image"
-  if ((col8x8_remaining >= MAX_MIB_SIZE) &&
-      (row8x8_remaining >= MAX_MIB_SIZE)) {
-    for (block_row = 0; block_row < MAX_MIB_SIZE; block_row += bh) {
-      for (block_col = 0; block_col < MAX_MIB_SIZE; block_col += bw) {
-        int index = block_row * mis + block_col;
-        mi_8x8[index] = mi_upper_left + index;
-        mi_8x8[index]->mbmi.sb_type = bsize;
+  // Apply the requested partition size to the SB if it is all "in image"
+  if ((mi_cols_remaining >= cm->mib_size) &&
+      (mi_rows_remaining >= cm->mib_size)) {
+    for (block_row = 0; block_row < cm->mib_size; block_row += bh) {
+      for (block_col = 0; block_col < cm->mib_size; block_col += bw) {
+        int index = block_row * cm->mi_stride + block_col;
+        mib[index] = mi_upper_left + index;
+        mib[index]->mbmi.sb_type = bsize;
       }
     }
   } else {
-    // Else this is a partial SB64.
-    set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
-        col8x8_remaining, bsize, mi_8x8);
+    // Else this is a partial SB.
+    set_partial_sb_partition(cm, mi_upper_left, bh, bw,
+                             mi_rows_remaining, mi_cols_remaining, bsize, mib);
   }
 }
 
 static void rd_use_partition(VP10_COMP *cpi,
                              ThreadData *td,
                              TileDataEnc *tile_data,
-                             MODE_INFO **mi_8x8, TOKENEXTRA **tp,
+                             MODE_INFO **mib, TOKENEXTRA **tp,
                              int mi_row, int mi_col,
                              BLOCK_SIZE bsize,
                              int *rate, int64_t *dist,
@@ -2575,17 +2577,17 @@
   TileInfo *const tile_info = &tile_data->tile_info;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  const int mis = cm->mi_stride;
   const int bs = num_8x8_blocks_wide_lookup[bsize];
   const int hbs = bs / 2;
-  int i, pl;
+  int i;
+  const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize);
   const BLOCK_SIZE subsize =  get_subsize(bsize, partition);
   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
   RD_COST last_part_rdc, none_rdc, chosen_rdc;
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
-  BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
+  BLOCK_SIZE bs_type = mib[0]->mbmi.sb_type;
   int do_partition_search = 1;
   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
 #if CONFIG_SUPERTX
@@ -2632,7 +2634,7 @@
       splits_below = 1;
       for (i = 0; i < 4; i++) {
         int jj = i >> 1, ii = i & 0x01;
-        MODE_INFO *this_mi = mi_8x8[jj * hbs * mis + ii * hbs];
+        MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs];
         if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
           splits_below = 0;
         }
@@ -2654,8 +2656,6 @@
 #endif
                        bsize, ctx, INT64_MAX);
 
-      pl = partition_plane_context(xd, mi_row, mi_col, bsize);
-
       if (none_rdc.rate < INT_MAX) {
         none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
         none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate,
@@ -2667,7 +2667,7 @@
 
       restore_context(x, &x_ctx, mi_row, mi_col, bsize);
 
-      mi_8x8[0]->mbmi.sb_type = bs_type;
+      mib[0]->mbmi.sb_type = bs_type;
       pc_tree->partitioning = partition;
     }
   }
@@ -2802,7 +2802,7 @@
 
         vp10_rd_cost_init(&tmp_rdc);
         rd_use_partition(cpi, td, tile_data,
-                         mi_8x8 + jj * hbs * mis + ii * hbs, tp,
+                         mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
                          mi_row + y_idx, mi_col + x_idx, subsize,
                          &tmp_rdc.rate, &tmp_rdc.dist,
 #if CONFIG_SUPERTX
@@ -2828,7 +2828,6 @@
       break;
   }
 
-  pl = partition_plane_context(xd, mi_row, mi_col, bsize);
   if (last_part_rdc.rate < INT_MAX) {
     last_part_rdc.rate += cpi->partition_cost[pl][partition];
     last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
@@ -2902,14 +2901,11 @@
         encode_sb(cpi, td, tile_info, tp,  mi_row + y_idx, mi_col + x_idx, 0,
                   split_subsize, pc_tree->split[i]);
 
-      pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
-                                   split_subsize);
       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
 #if CONFIG_SUPERTX
       chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_SPLIT];
 #endif
     }
-    pl = partition_plane_context(xd, mi_row, mi_col, bsize);
     if (chosen_rdc.rate < INT_MAX) {
       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
       chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
@@ -2922,7 +2918,7 @@
 
   // If last_part is better set the partitioning to that.
   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
-    mi_8x8[0]->mbmi.sb_type = bsize;
+    mib[0]->mbmi.sb_type = bsize;
     if (bsize >= BLOCK_8X8)
       pc_tree->partitioning = partition;
     chosen_rdc = last_part_rdc;
@@ -2944,11 +2940,11 @@
 
   // We must have chosen a partitioning and encoding or we'll fail later on.
   // No other opportunities for success.
-  if (bsize == BLOCK_LARGEST)
+  if (bsize == cm->sb_size)
     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
 
   if (do_recon) {
-    int output_enabled = (bsize == BLOCK_LARGEST);
+    int output_enabled = (bsize == cm->sb_size);
     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
               pc_tree);
   }
@@ -2972,13 +2968,13 @@
 };
 
 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
-                               BLOCK_8X8,   //                     4x4
-  BLOCK_16X16, BLOCK_16X16,  BLOCK_16X16,   //    4x8,    8x4,     8x8
-  BLOCK_32X32, BLOCK_32X32,  BLOCK_32X32,   //   8x16,   16x8,   16x16
-  BLOCK_64X64, BLOCK_64X64,  BLOCK_64X64,   //  16x32,  32x16,   32x32
-  BLOCK_64X64, BLOCK_64X64,  BLOCK_64X64,   //  32x64,  64x32,   64x64
+                                    BLOCK_8X8,  //                     4x4
+    BLOCK_16X16,   BLOCK_16X16,   BLOCK_16X16,  //    4x8,    8x4,     8x8
+    BLOCK_32X32,   BLOCK_32X32,   BLOCK_32X32,  //   8x16,   16x8,   16x16
+    BLOCK_64X64,   BLOCK_64X64,   BLOCK_64X64,  //  16x32,  32x16,   32x32
+  BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST,  //  32x64,  64x32,   64x64
 #if CONFIG_EXT_PARTITION
-  BLOCK_64X64, BLOCK_64X64, BLOCK_128X128   // 64x128, 128x64, 128x128
+  BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST   // 64x128, 128x64, 128x128
 #endif  // CONFIG_EXT_PARTITION
 };
 
@@ -2996,26 +2992,24 @@
 
 // Look at all the mode_info entries for blocks that are part of this
 // partition and find the min and max values for sb_type.
-// At the moment this is designed to work on a 64x64 SB but could be
+// At the moment this is designed to work on a superblock but could be
 // adjusted to use a size parameter.
 //
 // The min and max are assumed to have been initialized prior to calling this
-// function so repeat calls can accumulate a min and max of more than one sb64.
-static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
+// function so repeat calls can accumulate a min and max of more than one
+// superblock.
+static void get_sb_partition_size_range(const VP10_COMMON *const cm,
+                                        MACROBLOCKD *xd, MODE_INFO **mib,
                                         BLOCK_SIZE *min_block_size,
-                                        BLOCK_SIZE *max_block_size,
-                                        int bs_hist[BLOCK_SIZES]) {
-  int sb_width_in_blocks = MAX_MIB_SIZE;
-  int sb_height_in_blocks  = MAX_MIB_SIZE;
+                                        BLOCK_SIZE *max_block_size) {
   int i, j;
   int index = 0;
 
   // Check the sb_type for each block that belongs to this region.
-  for (i = 0; i < sb_height_in_blocks; ++i) {
-    for (j = 0; j < sb_width_in_blocks; ++j) {
-      MODE_INFO *mi = mi_8x8[index+j];
-      BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
-      bs_hist[sb_type]++;
+  for (i = 0; i < cm->mib_size; ++i) {
+    for (j = 0; j < cm->mib_size; ++j) {
+      MODE_INFO *mi = mib[index+j];
+      BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : BLOCK_4X4;
       *min_block_size = VPXMIN(*min_block_size, sb_type);
       *max_block_size = VPXMAX(*max_block_size, sb_type);
     }
@@ -3034,12 +3028,11 @@
   MODE_INFO **mi = xd->mi;
   const int left_in_image = xd->left_available && mi[-1];
   const int above_in_image = xd->up_available && mi[-xd->mi_stride];
-  const int row8x8_remaining = tile->mi_row_end - mi_row;
-  const int col8x8_remaining = tile->mi_col_end - mi_col;
+  const int mi_rows_remaining = tile->mi_row_end - mi_row;
+  const int mi_cols_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
   BLOCK_SIZE min_size = BLOCK_4X4;
   BLOCK_SIZE max_size = BLOCK_LARGEST;
-  int bs_hist[BLOCK_SIZES] = {0};
 
   // Trap case where we do not have a prediction.
   if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
@@ -3053,19 +3046,17 @@
     if (cm->frame_type != KEY_FRAME) {
       MODE_INFO **prev_mi =
           &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
-      get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
+      get_sb_partition_size_range(cm, xd, prev_mi, &min_size, &max_size);
     }
-    // Find the min and max partition sizes used in the left SB64
+    // Find the min and max partition sizes used in the left superblock
     if (left_in_image) {
-      MODE_INFO **left_sb64_mi = &mi[-MAX_MIB_SIZE];
-      get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
-                                  bs_hist);
+      MODE_INFO **left_sb_mi = &mi[-cm->mib_size];
+      get_sb_partition_size_range(cm, xd, left_sb_mi, &min_size, &max_size);
     }
-    // Find the min and max partition sizes used in the above SB64.
+    // Find the min and max partition sizes used in the above suprblock.
     if (above_in_image) {
-      MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MAX_MIB_SIZE];
-      get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
-                                  bs_hist);
+      MODE_INFO **above_sb_mi = &mi[-xd->mi_stride * cm->mib_size];
+      get_sb_partition_size_range(cm, xd, above_sb_mi, &min_size, &max_size);
     }
 
     // Adjust observed min and max for "relaxed" auto partition case.
@@ -3076,29 +3067,28 @@
   }
 
   // Check border cases where max and min from neighbors may not be legal.
-  max_size = find_partition_size(max_size,
-                                 row8x8_remaining, col8x8_remaining,
+  max_size = find_partition_size(max_size, mi_rows_remaining, mi_cols_remaining,
                                  &bh, &bw);
+  min_size = VPXMIN(min_size, max_size);
+
   // Test for blocks at the edge of the active image.
   // This may be the actual edge of the image or where there are formatting
   // bars.
   if (vp10_active_edge_sb(cpi, mi_row, mi_col)) {
     min_size = BLOCK_4X4;
   } else {
-    min_size =
-        VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
+    min_size = VPXMIN(cpi->sf.rd_auto_partition_min_limit, min_size);
   }
 
   // When use_square_partition_only is true, make sure at least one square
   // partition is allowed by selecting the next smaller square size as
   // *min_block_size.
-  if (cpi->sf.use_square_partition_only &&
-      next_square_size[max_size] < min_size) {
-     min_size = next_square_size[max_size];
+  if (cpi->sf.use_square_partition_only) {
+    min_size = VPXMIN(min_size, next_square_size[max_size]);
   }
 
-  *min_block_size = min_size;
-  *max_block_size = max_size;
+  *min_block_size = VPXMIN(min_size, cm->sb_size);
+  *max_block_size = VPXMIN(max_size, cm->sb_size);
 }
 
 // TODO(jingning) refactor functions setting partition search range
@@ -3151,8 +3141,8 @@
     max_size = max_partition_size[max_size];
   }
 
-  *min_bs = min_size;
-  *max_bs = max_size;
+  *min_bs = VPXMIN(min_size, cm->sb_size);
+  *max_bs = VPXMIN(max_size, cm->sb_size);
 }
 
 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
@@ -4196,12 +4186,12 @@
 
   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
       pc_tree->index != 3) {
-    int output_enabled = (bsize == BLOCK_LARGEST);
+    int output_enabled = (bsize == cm->sb_size);
     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
               bsize, pc_tree);
   }
 
-  if (bsize == BLOCK_LARGEST) {
+  if (bsize == cm->sb_size) {
     assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip));
     assert(best_rdc.rate < INT_MAX);
     assert(best_rdc.dist < INT64_MAX);
@@ -4232,7 +4222,7 @@
 
   // Code each SB in the row
   for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
-       mi_col += MAX_MIB_SIZE) {
+       mi_col += cm->mib_size) {
     const struct segmentation *const seg = &cm->seg;
     int dummy_rate;
     int64_t dummy_dist;
@@ -4264,29 +4254,29 @@
     if (seg->enabled) {
       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
                                                  : cm->last_frame_seg_map;
-      int segment_id = get_segment_id(cm, map, BLOCK_LARGEST, mi_row, mi_col);
+      int segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col);
       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
     }
 
     x->source_variance = UINT_MAX;
     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
-      const BLOCK_SIZE bsize =
-          seg_skip ? BLOCK_LARGEST : sf->always_this_block_size;
-      set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
+      BLOCK_SIZE bsize;
+      set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
+      bsize = seg_skip ? cm->sb_size : sf->always_this_block_size;
       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
-                       BLOCK_LARGEST, &dummy_rate, &dummy_dist,
+                       cm->sb_size, &dummy_rate, &dummy_dist,
 #if CONFIG_SUPERTX
                        &dummy_rate_nocoef,
 #endif  // CONFIG_SUPERTX
                        1, td->pc_root);
     } else if (cpi->partition_search_skippable_frame) {
       BLOCK_SIZE bsize;
-      set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
+      set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
       bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
-                       BLOCK_LARGEST, &dummy_rate, &dummy_dist,
+                       cm->sb_size, &dummy_rate, &dummy_dist,
 #if CONFIG_SUPERTX
                        &dummy_rate_nocoef,
 #endif  // CONFIG_SUPERTX
@@ -4295,7 +4285,7 @@
                cm->frame_type != KEY_FRAME) {
       choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
-                       BLOCK_LARGEST, &dummy_rate, &dummy_dist,
+                       cm->sb_size, &dummy_rate, &dummy_dist,
 #if CONFIG_SUPERTX
                        &dummy_rate_nocoef,
 #endif  // CONFIG_SUPERTX
@@ -4303,17 +4293,19 @@
     } else {
       // If required set upper and lower partition size limits
       if (sf->auto_min_max_partition_size) {
-        set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_LARGEST);
+        set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->sb_size);
         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
                                 &x->min_partition_size,
                                 &x->max_partition_size);
       }
-      rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_LARGEST,
+      rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size,
                         &dummy_rdc,
 #if CONFIG_SUPERTX
                         &dummy_rate_nocoef,
 #endif  // CONFIG_SUPERTX
-                        INT64_MAX, td->pc_root);
+                        INT64_MAX,
+                        cm->sb_size == BLOCK_LARGEST ? td->pc_root
+                                                     : td->pc_root->split[0]);
     }
   }
 #if CONFIG_ENTROPY
@@ -4469,7 +4461,7 @@
   td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
 
   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
-       mi_row += MAX_MIB_SIZE) {
+       mi_row += cm->mib_size) {
     encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
   }
 
@@ -4512,6 +4504,9 @@
   RD_COUNTS *const rdc = &cpi->td.rd_counts;
   int i;
 
+  x->min_partition_size = VPXMIN(x->min_partition_size, cm->sb_size);
+  x->max_partition_size = VPXMIN(x->max_partition_size, cm->sb_size);
+
   xd->mi = cm->mi_grid_visible;
   xd->mi[0] = cm->mi;
 
@@ -4737,7 +4732,6 @@
     }
 #endif
   } else {
-    cm->reference_mode = SINGLE_REFERENCE;
     encode_frame_internal(cpi);
   }
 }
diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c
index 5af0684..a39575b 100644
--- a/vp10/encoder/encoder.c
+++ b/vp10/encoder/encoder.c
@@ -248,6 +248,29 @@
 #endif
 }
 
+static BLOCK_SIZE select_sb_size(const VP10_COMP *const cpi) {
+#if CONFIG_EXT_PARTITION
+  if (cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_64X64)
+    return BLOCK_64X64;
+
+  if (cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_128X128)
+    return BLOCK_128X128;
+
+  assert(cpi->oxcf.superblock_size == VPX_SUPERBLOCK_SIZE_DYNAMIC);
+
+  assert(IMPLIES(cpi->common.tile_cols > 1,
+                 cpi->common.tile_width % MAX_MIB_SIZE == 0));
+  assert(IMPLIES(cpi->common.tile_rows > 1,
+                 cpi->common.tile_height % MAX_MIB_SIZE == 0));
+
+  // TODO(any): Possibly could improve this with a heuristic.
+  return BLOCK_128X128;
+#else
+  (void)cpi;
+  return BLOCK_64X64;
+#endif  //  CONFIG_EXT_PARTITION
+}
+
 static void setup_frame(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
   // Set up entropy context depending on frame type. The decoder mandates
@@ -269,6 +292,8 @@
     *cm->fc = cm->frame_contexts[cm->frame_context_idx];
     vp10_zero(cpi->interp_filter_selected[0]);
   }
+
+  set_sb_size(cm, select_sb_size(cpi));
 }
 
 static void vp10_enc_setup_mi(VP10_COMMON *cm) {
@@ -786,15 +811,31 @@
   vp10_rc_update_framerate(cpi);
 }
 
-static void set_tile_limits(VP10_COMP *cpi) {
+static void set_tile_info(VP10_COMP *cpi) {
   VP10_COMMON *const cm = &cpi->common;
+
 #if CONFIG_EXT_TILE
-  cm->tile_width  = clamp(cpi->oxcf.tile_columns, 1, 64) << MAX_MIB_SIZE_LOG2;
-  cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64) << MAX_MIB_SIZE_LOG2;
+#if CONFIG_EXT_PARTITION
+  if (cpi->oxcf.superblock_size != VPX_SUPERBLOCK_SIZE_64X64) {
+    cm->tile_width  = clamp(cpi->oxcf.tile_columns, 1, 32);
+    cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 32);
+    cm->tile_width  <<= MAX_MIB_SIZE_LOG2;
+    cm->tile_height <<= MAX_MIB_SIZE_LOG2;
+  } else
+#endif  // CONFIG_EXT_PARTITION
+  {
+    cm->tile_width  = clamp(cpi->oxcf.tile_columns, 1, 64);
+    cm->tile_height = clamp(cpi->oxcf.tile_rows, 1, 64);
+    cm->tile_width  <<= MAX_MIB_SIZE_LOG2 - 1;
+    cm->tile_height <<= MAX_MIB_SIZE_LOG2 - 1;
+  }
 
   cm->tile_width  = VPXMIN(cm->tile_width, cm->mi_cols);
   cm->tile_height = VPXMIN(cm->tile_height, cm->mi_rows);
 
+  assert(cm->tile_width >> MAX_MIB_SIZE <= 32);
+  assert(cm->tile_height >> MAX_MIB_SIZE <= 32);
+
   // Get the number of tiles
   cm->tile_cols = 1;
   while (cm->tile_cols * cm->tile_width < cm->mi_cols)
@@ -814,11 +855,14 @@
   cm->tile_cols = 1 << cm->log2_tile_cols;
   cm->tile_rows = 1 << cm->log2_tile_rows;
 
-  cm->tile_width = (mi_cols_aligned_to_sb(cm->mi_cols) >> cm->log2_tile_cols);
-  cm->tile_height = (mi_cols_aligned_to_sb(cm->mi_rows) >> cm->log2_tile_rows);
-  // round to integer multiples of 8
-  cm->tile_width  = mi_cols_aligned_to_sb(cm->tile_width);
-  cm->tile_height = mi_cols_aligned_to_sb(cm->tile_height);
+  cm->tile_width = ALIGN_POWER_OF_TWO(cm->mi_cols, MAX_MIB_SIZE_LOG2);
+  cm->tile_width >>= cm->log2_tile_cols;
+  cm->tile_height = ALIGN_POWER_OF_TWO(cm->mi_rows, MAX_MIB_SIZE_LOG2);
+  cm->tile_height >>= cm->log2_tile_rows;
+
+  // round to integer multiples of max superblock size
+  cm->tile_width  = ALIGN_POWER_OF_TWO(cm->tile_width, MAX_MIB_SIZE_LOG2);
+  cm->tile_height = ALIGN_POWER_OF_TWO(cm->tile_height, MAX_MIB_SIZE_LOG2);
 #endif  // CONFIG_EXT_TILE
 }
 
@@ -832,7 +876,7 @@
   memset(cpi->mbmi_ext_base, 0,
          cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
 
-  set_tile_limits(cpi);
+  set_tile_info(cpi);
 }
 
 static void init_buffer_indices(VP10_COMP *cpi) {
@@ -2015,7 +2059,7 @@
   cpi->last_frame_distortion = 0;
 #endif
 
-  set_tile_limits(cpi);
+  set_tile_info(cpi);
 
   cpi->ext_refresh_frame_flags_pending = 0;
   cpi->ext_refresh_frame_context_pending = 0;
@@ -3699,8 +3743,7 @@
   setup_frame(cpi);
 
 #if CONFIG_ENTROPY
-  cm->do_subframe_update =
-      cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0;
+  cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
   vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs);
   vp10_copy(cpi->subframe_stats.enc_starting_coef_probs,
             cm->fc->coef_probs);
@@ -3827,8 +3870,7 @@
 #endif  // CONFIG_ENTROPY
 
 #if CONFIG_ENTROPY
-    cm->do_subframe_update =
-        cm->log2_tile_cols == 0 && cm->log2_tile_rows == 0;
+    cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1;
     if (loop_count == 0 || frame_is_intra_only(cm) ||
         cm->error_resilient_mode) {
       vp10_copy(cm->starting_coef_probs, cm->fc->coef_probs);
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index ddc20f0..2098378 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -235,6 +235,10 @@
   int color_range;
   int render_width;
   int render_height;
+
+#if CONFIG_EXT_PARTITION
+  vpx_superblock_size_t superblock_size;
+#endif  // CONFIG_EXT_PARTITION
 } VP10EncoderConfig;
 
 static INLINE int is_lossless_requested(const VP10EncoderConfig *cfg) {
@@ -418,7 +422,7 @@
   // clips, and 300 for < HD clips.
   int encode_breakout;
 
-  unsigned char *segmentation_map;
+  uint8_t *segmentation_map;
 
   // segment threashold for encode breakout
   int  segment_encode_breakout[MAX_SEGMENTS];
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 446b54d..ce9fad7 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -935,14 +935,15 @@
   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
 }
 
-void vp10_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
-                               int bsize, int best_mode_index) {
+void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm,
+                                int (*factor_buf)[MAX_MODES], int rd_thresh,
+                                int bsize, int best_mode_index) {
   if (rd_thresh > 0) {
     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
     int mode;
     for (mode = 0; mode < top_mode; ++mode) {
       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
-      const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_LARGEST);
+      const BLOCK_SIZE max_size = VPXMIN(bsize + 2, cm->sb_size);
       BLOCK_SIZE bs;
       for (bs = min_size; bs <= max_size; ++bs) {
         int *const fact = &factor_buf[bs][mode];
diff --git a/vp10/encoder/rd.h b/vp10/encoder/rd.h
index 10be9df..80749dc 100644
--- a/vp10/encoder/rd.h
+++ b/vp10/encoder/rd.h
@@ -337,8 +337,9 @@
 
 void vp10_set_rd_speed_thresholds_sub8x8(struct VP10_COMP *cpi);
 
-void vp10_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh,
-                               int bsize, int best_mode_index);
+void vp10_update_rd_thresh_fact(const VP10_COMMON *const cm,
+                                int (*fact)[MAX_MODES], int rd_thresh,
+                                int bsize, int best_mode_index);
 
 #if CONFIG_ENTROPY
 void fill_token_costs(vp10_coeff_cost *c,
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 980155b..918ad3e 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -5752,10 +5752,9 @@
     step_param = cpi->mv_step_param;
   }
 
-  if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST) {
-    int boffset =
-        2 * (b_width_log2_lookup[BLOCK_LARGEST] -
-             VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+  if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
+    int boffset =  2 * (b_width_log2_lookup[cm->sb_size] -
+         VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
     step_param = VPXMAX(step_param, boffset);
   }
 
@@ -5926,9 +5925,9 @@
   }
 
   // TODO(debargha): is show_frame needed here?
-  if (cpi->sf.adaptive_motion_search && bsize < BLOCK_LARGEST &&
+  if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size &&
       cm->show_frame) {
-    int boffset = 2 * (b_width_log2_lookup[BLOCK_LARGEST] -
+    int boffset = 2 * (b_width_log2_lookup[cm->sb_size] -
           VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
     step_param = VPXMAX(step_param, boffset);
   }
@@ -7460,8 +7459,8 @@
 // bars embedded in the stream.
 int vp10_active_edge_sb(VP10_COMP *cpi,
                        int mi_row, int mi_col) {
-  return vp10_active_h_edge(cpi, mi_row, MAX_MIB_SIZE) ||
-         vp10_active_v_edge(cpi, mi_col, MAX_MIB_SIZE);
+  return vp10_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
+         vp10_active_v_edge(cpi, mi_col, cpi->common.mib_size);
 }
 
 static void restore_uv_color_map(VP10_COMP *cpi, MACROBLOCK *x) {
@@ -8952,8 +8951,8 @@
          !is_inter_block(&best_mbmode));
 
   if (!cpi->rc.is_src_frame_alt_ref)
-    vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
-                              sf->adaptive_rd_thresh, bsize, best_mode_index);
+    vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+                               sf->adaptive_rd_thresh, bsize, best_mode_index);
 
   // macroblock modes
   *mbmi = best_mbmode;
@@ -9099,8 +9098,8 @@
   assert((cm->interp_filter == SWITCHABLE) ||
          (cm->interp_filter == mbmi->interp_filter));
 
-  vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
-                            cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
+  vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+                             cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
 
   vp10_zero(best_pred_diff);
 
@@ -9821,8 +9820,8 @@
          (cm->interp_filter == best_mbmode.interp_filter) ||
          !is_inter_block(&best_mbmode));
 
-  vp10_update_rd_thresh_fact(tile_data->thresh_freq_fact,
-                            sf->adaptive_rd_thresh, bsize, best_ref_index);
+  vp10_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
+                             sf->adaptive_rd_thresh, bsize, best_ref_index);
 
   // macroblock modes
   *mbmi = best_mbmode;
diff --git a/vp10/encoder/segmentation.c b/vp10/encoder/segmentation.c
index 8628b99..f3fa210 100644
--- a/vp10/encoder/segmentation.c
+++ b/vp10/encoder/segmentation.c
@@ -327,13 +327,13 @@
       mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride +
                  tile_info.mi_col_start;
       for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
-           mi_row += MAX_MIB_SIZE, mi_ptr += MAX_MIB_SIZE * cm->mi_stride) {
+           mi_row += cm->mib_size, mi_ptr += cm->mib_size * cm->mi_stride) {
         MODE_INFO **mi = mi_ptr;
         for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
-             mi_col += MAX_MIB_SIZE, mi += MAX_MIB_SIZE) {
+             mi_col += cm->mib_size, mi += cm->mib_size) {
           count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
                         temporal_predictor_count, t_unpred_seg_counts,
-                        mi_row, mi_col, BLOCK_LARGEST);
+                        mi_row, mi_col, cm->sb_size);
         }
       }
     }
diff --git a/vp10/encoder/speed_features.h b/vp10/encoder/speed_features.h
index ea4df6e..6ba074d 100644
--- a/vp10/encoder/speed_features.h
+++ b/vp10/encoder/speed_features.h
@@ -319,8 +319,8 @@
   // Disable testing non square partitions. (eg 16x32)
   int use_square_partition_only;
 
-  // Sets min and max partition sizes for this 64x64 region based on the
-  // same 64x64 in last encoded frame, and the left and above neighbor.
+  // Sets min and max partition sizes for this superblock based on the
+  // same superblock in last encoded frame, and the left and above neighbor.
   AUTO_MIN_MAX_MODE auto_min_max_partition_size;
   // Ensures the rd based auto partition search will always
   // go down at least to the specified level.
diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c
index 047fcfb..0cad961 100644
--- a/vp10/vp10_cx_iface.c
+++ b/vp10/vp10_cx_iface.c
@@ -49,40 +49,42 @@
   int                         color_range;
   int                         render_width;
   int                         render_height;
+  vpx_superblock_size_t       superblock_size;
 };
 
 static struct vp10_extracfg default_extra_cfg = {
-  0,                          // cpu_used
-  1,                          // enable_auto_alt_ref
-  0,                          // noise_sensitivity
-  0,                          // sharpness
-  0,                          // static_thresh
+  0,                            // cpu_used
+  1,                            // enable_auto_alt_ref
+  0,                            // noise_sensitivity
+  0,                            // sharpness
+  0,                            // static_thresh
 #if CONFIG_EXT_TILE
-  64,                         // tile_columns
-  64,                         // tile_rows
+  UINT_MAX,                     // tile_columns
+  UINT_MAX,                     // tile_rows
 #else
-  0,                          // tile_columns
-  0,                          // tile_rows
+  0,                            // tile_columns
+  0,                            // tile_rows
 #endif  // CONFIG_EXT_TILE
-  7,                          // arnr_max_frames
-  5,                          // arnr_strength
-  0,                          // min_gf_interval; 0 -> default decision
-  0,                          // max_gf_interval; 0 -> default decision
-  VP8_TUNE_PSNR,              // tuning
-  10,                         // cq_level
-  0,                          // rc_max_intra_bitrate_pct
-  0,                          // rc_max_inter_bitrate_pct
-  0,                          // gf_cbr_boost_pct
-  0,                          // lossless
-  1,                          // frame_parallel_decoding_mode
-  NO_AQ,                      // aq_mode
-  0,                          // frame_periodic_delta_q
-  VPX_BITS_8,                 // Bit depth
-  VP9E_CONTENT_DEFAULT,       // content
-  VPX_CS_UNKNOWN,             // color space
-  0,                          // color range
-  0,                          // render width
-  0,                          // render height
+  7,                            // arnr_max_frames
+  5,                            // arnr_strength
+  0,                            // min_gf_interval; 0 -> default decision
+  0,                            // max_gf_interval; 0 -> default decision
+  VP8_TUNE_PSNR,                // tuning
+  10,                           // cq_level
+  0,                            // rc_max_intra_bitrate_pct
+  0,                            // rc_max_inter_bitrate_pct
+  0,                            // gf_cbr_boost_pct
+  0,                            // lossless
+  1,                            // frame_parallel_decoding_mode
+  NO_AQ,                        // aq_mode
+  0,                            // frame_periodic_delta_q
+  VPX_BITS_8,                   // Bit depth
+  VP9E_CONTENT_DEFAULT,         // content
+  VPX_CS_UNKNOWN,               // color space
+  0,                            // color range
+  0,                            // render width
+  0,                            // render height
+  VPX_SUPERBLOCK_SIZE_DYNAMIC   // superblock_size
 };
 
 struct vpx_codec_alg_priv {
@@ -199,12 +201,26 @@
   RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
   RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
   RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
+  RANGE_CHECK(extra_cfg, superblock_size,
+              VPX_SUPERBLOCK_SIZE_64X64, VPX_SUPERBLOCK_SIZE_DYNAMIC);
 #if CONFIG_EXT_TILE
   // TODO(any): Waring. If CONFIG_EXT_TILE is true, tile_columns really
   // means tile_width, and tile_rows really means tile_hight. The interface
   // should be sanitized.
-  RANGE_CHECK(extra_cfg, tile_columns, 1, 64);
-  RANGE_CHECK(extra_cfg, tile_rows, 1, 64);
+#if CONFIG_EXT_PARTITION
+  if (extra_cfg->superblock_size != VPX_SUPERBLOCK_SIZE_64X64) {
+    if (extra_cfg->tile_columns != UINT_MAX)
+      RANGE_CHECK(extra_cfg, tile_columns, 1, 32);
+    if (extra_cfg->tile_rows != UINT_MAX)
+      RANGE_CHECK(extra_cfg, tile_rows, 1, 32);
+  } else
+#endif  // CONFIG_EXT_PARTITION
+  {
+    if (extra_cfg->tile_columns != UINT_MAX)
+      RANGE_CHECK(extra_cfg, tile_columns, 1, 64);
+    if (extra_cfg->tile_rows != UINT_MAX)
+      RANGE_CHECK(extra_cfg, tile_rows, 1, 64);
+  }
 #else
   RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
   RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
@@ -416,8 +432,25 @@
   oxcf->tuning = extra_cfg->tuning;
   oxcf->content = extra_cfg->content;
 
+#if CONFIG_EXT_PARTITION
+  oxcf->superblock_size = extra_cfg->superblock_size;
+#endif  // CONFIG_EXT_PARTITION
+
+#if CONFIG_EXT_TILE
+  {
+#if CONFIG_EXT_PARTITION
+    const unsigned int max =
+      extra_cfg->superblock_size == VPX_SUPERBLOCK_SIZE_64X64 ? 64 : 32;
+#else
+    const unsigned int max = 64;
+#endif  // CONFIG_EXT_PARTITION
+    oxcf->tile_columns = VPXMIN(extra_cfg->tile_columns, max);
+    oxcf->tile_rows    = VPXMIN(extra_cfg->tile_rows, max);
+  }
+#else
   oxcf->tile_columns = extra_cfg->tile_columns;
   oxcf->tile_rows    = extra_cfg->tile_rows;
+#endif  // CONFIG_EXT_TILE
 
   oxcf->error_resilient_mode         = cfg->g_error_resilient;
   oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
@@ -1247,6 +1280,13 @@
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static vpx_codec_err_t ctrl_set_superblock_size(vpx_codec_alg_priv_t *ctx,
+                                            va_list args) {
+  struct vp10_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.superblock_size = CAST(VP10E_SET_SUPERBLOCK_SIZE, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   {VP8_COPY_REFERENCE,                ctrl_copy_reference},
   {VP8E_USE_REFERENCE,                ctrl_use_reference},
@@ -1283,6 +1323,7 @@
   {VP9E_SET_MIN_GF_INTERVAL,          ctrl_set_min_gf_interval},
   {VP9E_SET_MAX_GF_INTERVAL,          ctrl_set_max_gf_interval},
   {VP9E_SET_RENDER_SIZE,              ctrl_set_render_size},
+  {VP10E_SET_SUPERBLOCK_SIZE,         ctrl_set_superblock_size},
 
   // Getters
   {VP8E_GET_LAST_QUANTIZER,           ctrl_get_quantizer},
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index d9764a4..1306481 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -560,6 +560,15 @@
    * Supported in codecs: VP9
    */
   VP9E_SET_RENDER_SIZE,
+
+  /*!\brief Codec control function to set intended superblock size.
+   *
+   * By default, the superblock size is determined separately for each
+   * frame by the encoder.
+   *
+   * Supported in codecs: VP10
+   */
+  VP10E_SET_SUPERBLOCK_SIZE,
 };
 
 /*!\brief vpx 1-D scaling mode
@@ -820,6 +829,9 @@
  */
 #define VPX_CTRL_VP9E_SET_RENDER_SIZE
 VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
+
+VPX_CTRL_USE_TYPE(VP10E_SET_SUPERBLOCK_SIZE, unsigned int)
+#define VPX_CTRL_VP10E_SET_SUPERBLOCK_SIZE
 /*!\endcond */
 /*! @} - end defgroup vp8_encoder */
 #ifdef __cplusplus
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index b6037bb..e65e3f4 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -222,6 +222,18 @@
     VPX_BITS_12 = 12,  /**< 12 bits */
   } vpx_bit_depth_t;
 
+  /*!\brief Superblock size selection.
+   *
+   * Defines the superblock size used for encoding. The superblock size can
+   * either be fixed at 64x64 or 128x128 pixels, or it can be dynamically
+   * selected by the encoder for each frame.
+   */
+  typedef enum vpx_superblock_size {
+    VPX_SUPERBLOCK_SIZE_64X64,    /**< Always use 64x64 superblocks. */
+    VPX_SUPERBLOCK_SIZE_128X128,  /**< Always use 128x128 superblocks. */
+    VPX_SUPERBLOCK_SIZE_DYNAMIC   /**< Select superblock size dynamically. */
+  } vpx_superblock_size_t;
+
   /*
    * Library Version Number Interface
    *
diff --git a/vpx_dsp/vpx_dsp_common.h b/vpx_dsp/vpx_dsp_common.h
index e127031..3571eea 100644
--- a/vpx_dsp/vpx_dsp_common.h
+++ b/vpx_dsp/vpx_dsp_common.h
@@ -30,6 +30,8 @@
 #define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
 #define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))
 
+#define IMPLIES(a, b)  (!(a) || (b))  //  Logical 'a implies b' (or 'a -> b')
+
 // These can be used to give a hint about branch outcomes.
 // This can have an effect, even if your target processor has a
 // good branch predictor, as these hints can affect basic block
diff --git a/vpxenc.c b/vpxenc.c
index 5e14934..ca29816 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -476,6 +476,17 @@
 #endif
 
 #if CONFIG_VP10_ENCODER
+#if CONFIG_EXT_PARTITION
+static const struct arg_enum_list superblock_size_enum[] = {
+  {"dynamic", VPX_SUPERBLOCK_SIZE_DYNAMIC},
+  {"64", VPX_SUPERBLOCK_SIZE_64X64},
+  {"128", VPX_SUPERBLOCK_SIZE_128X128},
+  {NULL, 0}
+};
+static const arg_def_t superblock_size = ARG_DEF_ENUM(
+    NULL, "sb-size", 1, "Superblock size to use", superblock_size_enum);
+#endif  // CONFIG_EXT_PARTITION
+
 static const arg_def_t *vp10_args[] = {
   &cpu_used_vp9, &auto_altref, &sharpness, &static_thresh,
   &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type,
@@ -484,6 +495,9 @@
   &frame_parallel_decoding, &aq_mode, &frame_periodic_boost,
   &noise_sens, &tune_content, &input_color_space,
   &min_gf_interval, &max_gf_interval,
+#if CONFIG_EXT_PARTITION
+  &superblock_size,
+#endif  // CONFIG_EXT_PARTITION
 #if CONFIG_VP9_HIGHBITDEPTH
   &bitdeptharg, &inbitdeptharg,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -500,6 +514,9 @@
   VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
   VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE,
   VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL,
+#if CONFIG_EXT_PARTITION
+  VP10E_SET_SUPERBLOCK_SIZE,
+#endif  // CONFIG_EXT_PARTITION
   0
 };
 #endif