Add options setting number of tile groups or MTU.

Default MTU size is 0, which implies a fixed number
of tile groups. MTU matching overrides fixed tile
group. MTU matching will succeed unless a single
tile is bigger than the MTU.

--mtu-size is in bytes, and includes headers but
not transport/wrapper overheads

Change-Id: I2b70bd41b175b54273b02d836f2a84011f617a7c
diff --git a/aom/aomcx.h b/aom/aomcx.h
index f3edd79..067da55 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -358,6 +358,29 @@
   AV1E_SET_QM_MAX,
 #endif
 
+#if CONFIG_TILE_GROUPS
+  /*!\brief Codec control function to set a maximum number of tile groups.
+   *
+   * This will set the maximum number of tile groups. This will be
+   * overridden if an MTU size is set. The default value is 1.
+   *
+   * Supported in codecs: AV1
+   */
+  AV1E_SET_NUM_TG,
+
+  /*!\brief Codec control function to set an MTU size for a tile group.
+   *
+   * This will set the maximum number of bytes in a tile group. This can be
+   * exceeded only if a single tile is larger than this amount.
+   *
+   * By default, the value is 0, in which case a fixed number of tile groups
+   * is used.
+   *
+   * Supported in codecs: AV1
+   */
+  AV1E_SET_MTU,
+#endif
+
   /*!\brief Codec control function to set number of tile columns.
    *
    * In encoding and decoding, AV1 allows an input image frame be partitioned
@@ -705,6 +728,13 @@
 #define AOM_CTRL_AV1E_SET_QM_MAX
 #endif
 
+#if CONFIG_TILE_GROUPS
+AOM_CTRL_USE_TYPE(AV1E_SET_NUM_TG, unsigned int)
+#define AOM_CTRL_AV1E_SET_NUM_TG
+AOM_CTRL_USE_TYPE(AV1E_SET_MTU, unsigned int)
+#define AOM_CTRL_AV1E_SET_MTU
+#endif
+
 AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_PARALLEL_DECODING, unsigned int)
 #define AOM_CTRL_AV1E_SET_FRAME_PARALLEL_DECODING
 
diff --git a/aomenc.c b/aomenc.c
index c80ee30..9a3fd86 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -388,6 +388,13 @@
 static const arg_def_t qm_max = ARG_DEF(
     NULL, "qm_max", 1, "Max quant matrix flatness (0..15), default is 16");
 #endif
+#if CONFIG_TILE_GROUPS
+static const arg_def_t num_tg =
+    ARG_DEF(NULL, "num-tile-groups", 1, "Maximum number of tile groups, default is 1");
+static const arg_def_t mtu_size =
+    ARG_DEF(NULL, "mtu-size", 1,
+            "MTU size for a tile group, default is 0 (no MTU targeting), overrides maximum number of tile groups");
+#endif
 static const arg_def_t frame_parallel_decoding =
     ARG_DEF(NULL, "frame-parallel", 1,
             "Enable frame parallel decodability features "
@@ -486,6 +493,10 @@
 #if CONFIG_EXT_PARTITION
                                        &superblock_size,
 #endif  // CONFIG_EXT_PARTITION
+#if CONFIG_TILE_GROUPS
+                                       &num_tg,
+                                       &mtu_size,
+#endif
 #if CONFIG_AOM_HIGHBITDEPTH
                                        &bitdeptharg,
                                        &inbitdeptharg,
@@ -522,6 +533,10 @@
 #if CONFIG_EXT_PARTITION
                                         AV1E_SET_SUPERBLOCK_SIZE,
 #endif  // CONFIG_EXT_PARTITION
+#if CONFIG_TILE_GROUPS
+                                        AV1E_SET_NUM_TG,
+                                        AV1E_SET_MTU,
+#endif
                                         0 };
 #endif
 
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index d63578e..36c987e 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -49,6 +49,10 @@
   unsigned int qm_min;
   unsigned int qm_max;
 #endif
+#if CONFIG_TILE_GROUPS
+  unsigned int num_tg;
+  unsigned int mtu_size;
+#endif
   unsigned int frame_parallel_decoding_mode;
   AQ_MODE aq_mode;
   unsigned int frame_periodic_boost;
@@ -92,6 +96,10 @@
   DEFAULT_QM_FIRST,  // qm_min
   DEFAULT_QM_LAST,   // qm_max
 #endif
+#if CONFIG_TILE_GROUPS
+  1,  // max number of tile groups
+  0,  // mtu_size
+#endif
   1,                           // frame_parallel_decoding_mode
   NO_AQ,                       // aq_mode
   0,                           // frame_periodic_delta_q
@@ -395,6 +403,11 @@
   oxcf->qm_maxlevel = extra_cfg->qm_max;
 #endif
 
+#if CONFIG_TILE_GROUPS
+  oxcf->num_tile_groups = extra_cfg->num_tg;
+  oxcf->mtu = extra_cfg->mtu_size;
+#endif
+
   oxcf->under_shoot_pct = cfg->rc_undershoot_pct;
   oxcf->over_shoot_pct = cfg->rc_overshoot_pct;
 
@@ -720,6 +733,21 @@
 }
 #endif
 
+#if CONFIG_TILE_GROUPS
+static aom_codec_err_t ctrl_set_num_tg(aom_codec_alg_priv_t *ctx,
+                                       va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.num_tg = CAST(AV1E_SET_NUM_TG, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static aom_codec_err_t ctrl_set_mtu(aom_codec_alg_priv_t *ctx, va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.mtu_size = CAST(AV1E_SET_MTU, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+#endif
+
 static aom_codec_err_t ctrl_set_frame_parallel_decoding_mode(
     aom_codec_alg_priv_t *ctx, va_list args) {
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1322,6 +1350,10 @@
   { AV1E_SET_QM_MIN, ctrl_set_qm_min },
   { AV1E_SET_QM_MAX, ctrl_set_qm_max },
 #endif
+#if CONFIG_TILE_GROUPS
+  { AV1E_SET_NUM_TG, ctrl_set_num_tg },
+  { AV1E_SET_MTU, ctrl_set_mtu },
+#endif
   { AV1E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode },
   { AV1E_SET_AQ_MODE, ctrl_set_aq_mode },
   { AV1E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost },
diff --git a/av1/common/tile_common.h b/av1/common/tile_common.h
index d63d260..2e83820 100644
--- a/av1/common/tile_common.h
+++ b/av1/common/tile_common.h
@@ -19,7 +19,7 @@
 struct AV1Common;
 
 #if CONFIG_TILE_GROUPS
-#define MAX_NUM_TG 3
+#define DEFAULT_MAX_NUM_TG 3
 #endif
 
 typedef struct TileInfo {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f9faa1c..2f1c9a1 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3375,6 +3375,7 @@
   size_t total_size = 0;
   const int tile_cols = cm->tile_cols;
   const int tile_rows = cm->tile_rows;
+  unsigned int tile_size = 0;
 #if CONFIG_TILE_GROUPS
   const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols;
   const int have_tiles = n_log2_tiles > 0;
@@ -3388,6 +3389,9 @@
   struct aom_write_bit_buffer comp_hdr_len_wb;
   struct aom_write_bit_buffer tg_params_wb;
   int saved_offset;
+  int mtu_size = cpi->oxcf.mtu;
+  int curr_tg_data_size = 0;
+  int hdr_size;
 #endif
 #if CONFIG_EXT_TILE
   const int have_tiles = tile_cols * tile_rows > 1;
@@ -3415,7 +3419,6 @@
 
     for (tile_row = 0; tile_row < tile_rows; tile_row++) {
       TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
-      unsigned int tile_size;
       const TOKENEXTRA *tok = tok_buffers[tile_row][tile_col];
       const TOKENEXTRA *tok_end = tok + cpi->tok_count[tile_row][tile_col];
       const int data_offset = have_tiles ? 4 : 0;
@@ -3500,7 +3503,8 @@
   dst = wb->bit_buffer;
   comp_hdr_size = write_compressed_header(cpi, dst + uncompressed_hdr_size);
   aom_wb_write_literal(&comp_hdr_len_wb, (int)(comp_hdr_size), 16);
-  total_size += uncompressed_hdr_size + comp_hdr_size;
+  hdr_size = uncompressed_hdr_size + comp_hdr_size;
+  total_size += hdr_size;
 #endif
 
   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
@@ -3513,7 +3517,6 @@
       const int tile_idx = tile_row * tile_cols + tile_col;
       TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
       const int is_last_col = (tile_col == tile_cols - 1);
-      unsigned int tile_size;
 #if CONFIG_PVQ
       TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
 #endif
@@ -3525,21 +3528,53 @@
 #else
       // All tiles in a tile group have a length
       const int is_last_tile = 0;
-      if (tile_count >= tg_size) {
-        // Copy uncompressed header
-        memcpy(dst + total_size, dst, uncompressed_hdr_size * sizeof(uint8_t));
-        // Write the number of tiles in the group into the last uncompressed
-        // header
-        aom_wb_write_literal(&tg_params_wb, tile_idx - tile_count,
-                             n_log2_tiles);
-        aom_wb_write_literal(&tg_params_wb, tile_count - 1, n_log2_tiles);
-        tg_params_wb.bit_offset = saved_offset + 8 * total_size;
-        // Copy compressed header
-        memcpy(dst + total_size + uncompressed_hdr_size,
-               dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t));
-        total_size += uncompressed_hdr_size;
-        total_size += comp_hdr_size;
-        tile_count = 0;
+
+      if ((!mtu_size && tile_count > tg_size) ||
+          (mtu_size && tile_count && curr_tg_data_size >= mtu_size)) {
+        // We've exceeded the packet size
+        if (tile_count > 1) {
+          /* The last tile exceeded the packet size. The tile group size
+             should therefore be tile_count-1.
+             Move the last tile and insert headers before it
+           */
+          int old_total_size = total_size - tile_size - 4;
+          memmove(dst + old_total_size + hdr_size, dst + old_total_size,
+                  (tile_size + 4) * sizeof(uint8_t));
+          // Copy uncompressed header
+          memmove(dst + old_total_size, dst,
+                  uncompressed_hdr_size * sizeof(uint8_t));
+          // Write the number of tiles in the group into the last uncompressed
+          // header before the one we've just inserted
+          aom_wb_write_literal(&tg_params_wb, tile_idx - tile_count,
+                               n_log2_tiles);
+          aom_wb_write_literal(&tg_params_wb, tile_count - 2, n_log2_tiles);
+          // Update the pointer to the last TG params
+          tg_params_wb.bit_offset = saved_offset + 8 * old_total_size;
+          // Copy compressed header
+          memmove(dst + old_total_size + uncompressed_hdr_size,
+                  dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t));
+          total_size += hdr_size;
+          tile_count = 1;
+          curr_tg_data_size = hdr_size + tile_size + 4;
+
+        } else {
+          // We exceeded the packet size in just one tile
+          // Copy uncompressed header
+          memmove(dst + total_size, dst,
+                  uncompressed_hdr_size * sizeof(uint8_t));
+          // Write the number of tiles in the group into the last uncompressed
+          // header
+          aom_wb_write_literal(&tg_params_wb, tile_idx - tile_count,
+                               n_log2_tiles);
+          aom_wb_write_literal(&tg_params_wb, tile_count - 1, n_log2_tiles);
+          tg_params_wb.bit_offset = saved_offset + 8 * total_size;
+          // Copy compressed header
+          memmove(dst + total_size + uncompressed_hdr_size,
+                  dst + uncompressed_hdr_size, comp_hdr_size * sizeof(uint8_t));
+          total_size += hdr_size;
+          tile_count = 0;
+          curr_tg_data_size = hdr_size;
+        }
       }
       tile_count++;
 #endif
@@ -3575,6 +3610,9 @@
 
       assert(tile_size > 0);
 
+#if CONFIG_TILE_GROUPS
+      curr_tg_data_size += tile_size + 4;
+#endif
       buf->size = tile_size;
 
       if (!is_last_tile) {
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 8a6ad18..e8ed909 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -142,9 +142,9 @@
                       aom_prob upd_p) {
   (void)upd_p;
 #if CONFIG_TILE_GROUPS
-  // Just use the maximum number of tile groups to avoid passing in the actual
+  // Just use the default maximum number of tile groups to avoid passing in the actual
   // number
-  av1_cond_prob_diff_update(w, cur_p, ct, MAX_NUM_TG);
+  av1_cond_prob_diff_update(w, cur_p, ct, DEFAULT_MAX_NUM_TG);
 #else
   av1_cond_prob_diff_update(w, cur_p, ct, 1);
 #endif
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index ddb18b6..7503815 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4585,10 +4585,12 @@
     }
   }
 #if CONFIG_TILE_GROUPS
-  if (cm->error_resilient_mode)
-    cm->num_tg = MAX_NUM_TG;
-  else
-    cm->num_tg = 1;
+  if (cpi->oxcf.mtu == 0) {
+    cm->num_tg = cpi->oxcf.num_tile_groups;
+  } else {
+    // Use a default value for the purposes of weighting costs in probability updates
+    cm->num_tg = DEFAULT_MAX_NUM_TG;
+  }
 #endif
 
   // For 1 pass CBR, check if we are dropping this frame.
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index 1838b92..bed3987 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -193,6 +193,10 @@
   int qm_minlevel;
   int qm_maxlevel;
 #endif
+#if CONFIG_TILE_GROUPS
+  unsigned int num_tile_groups;
+  unsigned int mtu;
+#endif
 
   // Internal frame size scaling.
   RESIZE_TYPE resize_mode;