add new experiment loopfiltering_across_tiles_ext

based on the latest discussion in the HW working group about how loop
filter should be integrated with tiles, the following decisions have been
made:
1. two seperated flages should be added for
loop_filter_across_tiles_enabled for horizontal tile boundary and
vertical tile boundary.
2. encoder and decoder should only check these two flags to determine
whether loop filtering (including deblocking, CDEF and loop restoration)
should cross tile boundaries (vertical and/or horizontal) or not
regardless the horitontal depepdent tile flag.

This change list implemented the support for two seperated
loop_filter_across_tiles_enabled flags for vertical and horizontal tile
boundaries. The new experiment is disabled as default before it is
adopted.

Change-Id: I814377947517f5419c08b004a3b71b950d01eadd
diff --git a/aom/aomcx.h b/aom/aomcx.h
index f8c7168..c86c0ae 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -553,6 +553,21 @@
   */
   AV1E_SET_DISABLE_TEMPMV,
 
+  /*!\brief Codec control function to set loop_filter_across_tiles_v_enabled
+   * and loop_filter_across_tiles_h_enabled.
+   * In encoding and decoding, AV1 allows disabling loop filter across tile
+   * boundary The parameter for this control describes the value of this flag,
+   * which has a valid range [0, 1]:
+   *            0 = disable loop filter across tile boundary
+   *            1 = enable loop filter across tile boundary
+   *
+   * By default, the value is 1, i.e. enable loop filter across tile boundary.
+   *
+   * Experiment: LOOPFILTERING_ACROSS_TILES_EXT
+   */
+  AV1E_SET_TILE_LOOPFILTER_V,
+  AV1E_SET_TILE_LOOPFILTER_H,
+
   /*!\brief Codec control function to set loop_filter_across_tiles_enabled.
    *
    * In encoding and decoding, AV1 allows disabling loop filter across tile
@@ -726,6 +741,10 @@
 AOM_CTRL_USE_TYPE(AV1E_SET_TILE_DEPENDENT_ROWS, int)
 #define AOM_CTRL_AV1E_SET_TILE_DEPENDENT_ROWS
 
+AOM_CTRL_USE_TYPE(AV1E_SET_TILE_LOOPFILTER_V, int)
+#define AOM_CTRL_AV1E_SET_TILE_LOOPFILTER_V
+AOM_CTRL_USE_TYPE(AV1E_SET_TILE_LOOPFILTER_H, int)
+#define AOM_CTRL_AV1E_SET_TILE_LOOPFILTER_H
 AOM_CTRL_USE_TYPE(AV1E_SET_TILE_LOOPFILTER, int)
 #define AOM_CTRL_AV1E_SET_TILE_LOOPFILTER
 
diff --git a/aomenc.c b/aomenc.c
index 6ecb055..f84f9cd 100644
--- a/aomenc.c
+++ b/aomenc.c
@@ -434,8 +434,17 @@
     ARG_DEF(NULL, "tile-dependent-rows", 1, "Enable dependent Tile rows");
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+static const arg_def_t tile_loopfilter_v =
+    ARG_DEF(NULL, "tile-loopfilter-v", 1,
+            "Enable loop filter across vertical tile boundary");
+static const arg_def_t tile_loopfilter_h =
+    ARG_DEF(NULL, "tile-loopfilter-h", 1,
+            "Enable loop filter across horizontal tile boundary");
+#else
 static const arg_def_t tile_loopfilter = ARG_DEF(
     NULL, "tile-loopfilter", 1, "Enable loop filter across tile boundary");
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 static const arg_def_t lossless =
     ARG_DEF(NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)");
@@ -579,7 +588,12 @@
                                        &tile_dependent_rows,
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+                                       &tile_loopfilter_v,
+                                       &tile_loopfilter_h,
+#else
                                        &tile_loopfilter,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
                                        &arnr_maxframes,
                                        &arnr_strength,
@@ -638,7 +652,12 @@
                                         AV1E_SET_TILE_DEPENDENT_ROWS,
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+                                        AV1E_SET_TILE_LOOPFILTER_V,
+                                        AV1E_SET_TILE_LOOPFILTER_H,
+#else
                                         AV1E_SET_TILE_LOOPFILTER,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
                                         AOME_SET_ARNR_MAXFRAMES,
                                         AOME_SET_ARNR_STRENGTH,
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 79910eb..9066965 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -43,7 +43,12 @@
   unsigned int dependent_horz_tiles;
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  unsigned int loop_filter_across_tiles_v_enabled;
+  unsigned int loop_filter_across_tiles_h_enabled;
+#else
   unsigned int loop_filter_across_tiles_enabled;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
   unsigned int arnr_max_frames;
   unsigned int arnr_strength;
@@ -105,7 +110,12 @@
   0,  // Dependent Horizontal tiles
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
-  1,              // loop_filter_across_tiles_enabled
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  1,  // loop_filter_across_tiles_v_enabled
+  1,  // loop_filter_across_tiles_h_enabled
+#else
+  1,  // loop_filter_across_tiles_enabled
+#endif            // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif            // CONFIG_LOOPFILTERING_ACROSS_TILES
   7,              // arnr_max_frames
   5,              // arnr_strength
@@ -324,7 +334,12 @@
   RANGE_CHECK_HI(extra_cfg, dependent_horz_tiles, 1);
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  RANGE_CHECK_HI(extra_cfg, loop_filter_across_tiles_v_enabled, 1);
+  RANGE_CHECK_HI(extra_cfg, loop_filter_across_tiles_h_enabled, 1);
+#else
   RANGE_CHECK_HI(extra_cfg, loop_filter_across_tiles_enabled, 1);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
   RANGE_CHECK_HI(extra_cfg, sharpness, 7);
   RANGE_CHECK_HI(extra_cfg, arnr_max_frames, 15);
@@ -658,8 +673,15 @@
                               extra_cfg->dependent_horz_tiles;
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  oxcf->loop_filter_across_tiles_v_enabled =
+      extra_cfg->loop_filter_across_tiles_v_enabled;
+  oxcf->loop_filter_across_tiles_h_enabled =
+      extra_cfg->loop_filter_across_tiles_h_enabled;
+#else
   oxcf->loop_filter_across_tiles_enabled =
       extra_cfg->loop_filter_across_tiles_enabled;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
   oxcf->error_resilient_mode = cfg->g_error_resilient;
   oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
@@ -808,6 +830,22 @@
 }
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+static aom_codec_err_t ctrl_set_tile_loopfilter_v(aom_codec_alg_priv_t *ctx,
+                                                  va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.loop_filter_across_tiles_v_enabled =
+      CAST(AV1E_SET_TILE_LOOPFILTER_V, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+static aom_codec_err_t ctrl_set_tile_loopfilter_h(aom_codec_alg_priv_t *ctx,
+                                                  va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.loop_filter_across_tiles_h_enabled =
+      CAST(AV1E_SET_TILE_LOOPFILTER_H, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+#else
 static aom_codec_err_t ctrl_set_tile_loopfilter(aom_codec_alg_priv_t *ctx,
                                                 va_list args) {
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -815,6 +853,7 @@
       CAST(AV1E_SET_TILE_LOOPFILTER, args);
   return update_extra_cfg(ctx, &extra_cfg);
 }
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
 static aom_codec_err_t ctrl_set_arnr_max_frames(aom_codec_alg_priv_t *ctx,
@@ -1593,7 +1632,12 @@
   { AV1E_SET_TILE_DEPENDENT_ROWS, ctrl_set_tile_dependent_rows },
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  { AV1E_SET_TILE_LOOPFILTER_V, ctrl_set_tile_loopfilter_v },
+  { AV1E_SET_TILE_LOOPFILTER_H, ctrl_set_tile_loopfilter_h },
+#else
   { AV1E_SET_TILE_LOOPFILTER, ctrl_set_tile_loopfilter },
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
   { AOME_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames },
   { AOME_SET_ARNR_STRENGTH, ctrl_set_arnr_strength },
diff --git a/av1/common/av1_loopfilter.c b/av1/common/av1_loopfilter.c
index ef70ddb..3568413 100644
--- a/av1/common/av1_loopfilter.c
+++ b/av1/common/av1_loopfilter.c
@@ -1018,7 +1018,7 @@
     *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
 }
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 // This function update the bit masks for the entire 64x64 region represented
 // by mi_row, mi_col. In case one of the edge is a tile boundary, loop filtering
 // for that edge is disabled. This function only check the tile boundary info
@@ -1281,7 +1281,7 @@
     }
   }
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
   if (av1_disable_loopfilter_on_tile_boundary(cm)) {
     update_tile_boundary_filter_mask(cm, mi_row, mi_col, lfm);
   }
@@ -1595,7 +1595,7 @@
 
     // Disable filtering on the leftmost column or tile boundary
     unsigned int border_mask = ~(mi_col == 0 ? 1 : 0);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
     MODE_INFO *const mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
     if (av1_disable_loopfilter_on_tile_boundary(cm) &&
         ((mi->mbmi.boundary_info & TILE_LEFT_BOUNDARY) != 0)) {
@@ -1643,7 +1643,7 @@
                                       &lfl[r][0], &mask_4x4_int, NULL,
                                       &row_masks, NULL);
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
     // Disable filtering on the abovemost row or tile boundary
     const MODE_INFO *mi = cm->mi + (mi_row + idx_r) * cm->mi_stride + mi_col;
     if ((av1_disable_loopfilter_on_tile_boundary(cm) &&
@@ -2111,13 +2111,15 @@
     uint32_t level = curr_level;
     // prepare outer edge parameters. deblock the edge if it's an edge of a TU
     if (coord) {
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
       MODE_INFO *const mi_bound = cm->mi + mi_row * cm->mi_stride + mi_col;
-      if (!av1_disable_loopfilter_on_tile_boundary(cm) ||
-          ((VERT_EDGE == edge_dir) &&
-           (0 == (mi_bound->mbmi.boundary_info & TILE_LEFT_BOUNDARY))) ||
-          ((HORZ_EDGE == edge_dir) &&
-           (0 == (mi_bound->mbmi.boundary_info & TILE_ABOVE_BOUNDARY))))
+      // here, assuming bounfary_info is set correctly based on the
+      // loop_filter_across_tiles_enabled flag, i.e, tile boundary should
+      // only be set to true when this flag is set to 0.
+      int left_boundary = (mi_bound->mbmi.boundary_info & TILE_LEFT_BOUNDARY);
+      int top_boundary = (mi_bound->mbmi.boundary_info & TILE_ABOVE_BOUNDARY);
+      if (((VERT_EDGE == edge_dir) && (0 == left_boundary)) ||
+          ((HORZ_EDGE == edge_dir) && (0 == top_boundary)))
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       {
         const int32_t tu_edge =
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index 26c7b43..ebd97db 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -218,23 +218,40 @@
       nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
       nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
       int tile_top, tile_left, tile_bottom, tile_right;
-      int mi_idx = MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc;
-      MODE_INFO *const mi_tl = cm->mi + mi_idx;
+
+      int mi_row = MI_SIZE_64X64 * fbr;
+      int mi_col = MI_SIZE_64X64 * fbc;
+      int mi_idx_tl = mi_row * cm->mi_stride + mi_col;
+      int mi_idx_tr = mi_row * cm->mi_stride + (mi_col + MI_SIZE_64X64 - 1);
+      int mi_idx_bl = (mi_row + MI_SIZE_64X64 - 1) * cm->mi_stride + mi_col;
+      // for the current filter block, it's top left corner mi structure (mi_tl)
+      // is first accessed to check whether the top and left boundaries are
+      // tile boundaries. Then bottom-left and top-right mi structures are
+      // accessed to check whether the bottom and right boundaries
+      // (respectively) are tile boundaries.
+      //
+      // Note that we can't just check the bottom-right mi structure - eg. if
+      // we're at the right-hand edge of the frame but not the bottom, then
+      // the bottom-right mi is NULL but the bottom-left is not.
+      //
+      // We assume the boundary information is set correctly based on the
+      // loop_filter_across_tiles_enabled flag, i.e, if this flag is set to 1,
+      // then boundary_info should not be treated as tile boundaries. Also
+      // assume CDEF filter block size is 64x64.
+      MODE_INFO *const mi_tl = cm->mi + mi_idx_tl;
+      MODE_INFO *const mi_tr = cm->mi + mi_idx_tr;
+      MODE_INFO *const mi_bl = cm->mi + mi_idx_bl;
       BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info;
       tile_top = boundary_tl & TILE_ABOVE_BOUNDARY;
       tile_left = boundary_tl & TILE_LEFT_BOUNDARY;
 
-      if (fbr != nvfb - 1 &&
-          (&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]))
-        tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]
-                          .mbmi.boundary_info &
-                      TILE_BOTTOM_BOUNDARY;
+      if (fbr != nvfb - 1 && mi_bl)
+        tile_bottom = mi_bl->mbmi.boundary_info & TILE_BOTTOM_BOUNDARY;
       else
         tile_bottom = 1;
 
-      if (fbc != nhfb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
-        tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info &
-                     TILE_RIGHT_BOUNDARY;
+      if (fbc != nhfb - 1 && mi_tr)
+        tile_right = mi_tr->mbmi.boundary_info & TILE_RIGHT_BOUNDARY;
       else
         tile_right = 1;
 
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index d153333..fea9de3 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -495,7 +495,12 @@
   int tile_group_start_col[MAX_TILE_ROWS][MAX_TILE_COLS];
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  int loop_filter_across_tiles_v_enabled;
+  int loop_filter_across_tiles_h_enabled;
+#else
   int loop_filter_across_tiles_enabled;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
   int byte_alignment;
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 7e81dca..7ac4c17 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -237,37 +237,25 @@
 // Helper function: Save one column of left/right context to the appropriate
 // column buffers, then extend the edge of the current tile into that column.
 //
-// Note: The code to deal with above/below boundaries may have filled out
-// the corners of the border with data from the tiles to our left or right,
-// which isn't allowed. To fix that up, we need to include the top and
-// bottom context regions in the area which we extend.
-// But note that we don't need to store the pixels we overwrite in the
-// corners of the context area - those have already been overwritten once,
-// so their original values are already in rlbs->tmp_save_{above,below}.
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+// Note: The height passed in should be the height of this processing unit,
+// but we actually save/restore an extra RESTORATION_BORDER pixels above and
+// below the stripe.
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 static void setup_boundary_column(const uint8_t *src8, int src_stride,
                                   uint8_t *dst8, int dst_stride, uint16_t *buf,
                                   int h, int use_highbd) {
   if (use_highbd) {
     const uint16_t *src16 = CONVERT_TO_SHORTPTR(src8);
     uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst8);
-    for (int i = -RESTORATION_BORDER; i < 0; i++)
-      dst16[i * dst_stride] = src16[i * src_stride];
-    for (int i = 0; i < h; i++) {
-      buf[i] = dst16[i * dst_stride];
+    for (int i = -RESTORATION_BORDER; i < h + RESTORATION_BORDER; i++) {
+      buf[i + RESTORATION_BORDER] = dst16[i * dst_stride];
       dst16[i * dst_stride] = src16[i * src_stride];
     }
-    for (int i = h; i < h + RESTORATION_BORDER; i++)
-      dst16[i * dst_stride] = src16[i * src_stride];
   } else {
-    for (int i = -RESTORATION_BORDER; i < 0; i++)
-      dst8[i * dst_stride] = src8[i * src_stride];
-    for (int i = 0; i < h; i++) {
-      buf[i] = dst8[i * dst_stride];
+    for (int i = -RESTORATION_BORDER; i < h + RESTORATION_BORDER; i++) {
+      buf[i + RESTORATION_BORDER] = dst8[i * dst_stride];
       dst8[i * dst_stride] = src8[i * src_stride];
     }
-    for (int i = h; i < h + RESTORATION_BORDER; i++)
-      dst8[i * dst_stride] = src8[i * src_stride];
   }
 }
 
@@ -276,9 +264,11 @@
                                     int use_highbd) {
   if (use_highbd) {
     uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst8);
-    for (int i = 0; i < h; i++) dst16[i * dst_stride] = buf[i];
+    for (int i = -RESTORATION_BORDER; i < h + RESTORATION_BORDER; i++)
+      dst16[i * dst_stride] = buf[i + RESTORATION_BORDER];
   } else {
-    for (int i = 0; i < h; i++) dst8[i * dst_stride] = (uint8_t)(buf[i]);
+    for (int i = -RESTORATION_BORDER; i < h + RESTORATION_BORDER; i++)
+      dst8[i * dst_stride] = buf[i + RESTORATION_BORDER];
   }
 }
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
@@ -320,15 +310,23 @@
 static void get_stripe_boundary_info(const RestorationTileLimits *limits,
                                      const AV1PixelRect *tile_rect, int ss_y,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+                                     int loop_filter_across_tiles_h_enabled,
+#else
                                      int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
                                      int *copy_above, int *copy_below) {
   *copy_above = 1;
   *copy_below = 1;
 
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  if (loop_filter_across_tiles_h_enabled) {
+#else
   if (loop_filter_across_tiles_enabled) {
-#endif
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
     const int rtile_offset = RESTORATION_TILE_OFFSET >> ss_y;
 
@@ -340,7 +338,7 @@
 
     if (first_stripe_in_tile) *copy_above = 0;
     if (last_stripe_in_tile) *copy_below = 0;
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
   }
 #endif
 }
@@ -362,7 +360,11 @@
     const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
     int rsb_row, int use_highbd, int h,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    const AV1PixelRect *tile_rect, int loop_filter_across_tiles_v_enabled,
+#else
     const AV1PixelRect *tile_rect, int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     uint8_t *data8, int data_stride, RestorationLineBuffers *rlbs,
     int copy_above, int copy_below) {
@@ -425,7 +427,8 @@
   }
 
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
-  if (!loop_filter_across_tiles_enabled) {
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  if (!loop_filter_across_tiles_v_enabled) {
     // If loopfiltering across tiles is disabled, we need to check if we're at
     // the edge of the current tile column. If we are, we need to extend the
     // leftmost/rightmost column within the tile by 3 pixels, so that the output
@@ -450,16 +453,55 @@
                               rlbs->tmp_save_right[j], h, use_highbd);
     }
   }
+#else
+  if (!loop_filter_across_tiles_enabled) {
+    // If loopfiltering across tiles is disabled, we need to extend tile edges
+    // by 3 pixels, to ensure that we don't sample from the tiles to our left
+    // or right.
+    const int at_tile_left_border = (limits->h_start == tile_rect->left);
+    const int at_tile_right_border = (limits->h_end == tile_rect->right);
+
+    if (at_tile_left_border) {
+      uint8_t *dst8 = data8 + limits->h_start + limits->v_start * data_stride;
+      for (int j = -RESTORATION_BORDER; j < 0; j++)
+        setup_boundary_column(dst8, data_stride, dst8 + j, data_stride,
+                              rlbs->tmp_save_left[j + RESTORATION_BORDER], h,
+                              use_highbd);
+    }
+
+    if (at_tile_right_border) {
+      uint8_t *dst8 = data8 + limits->h_end + limits->v_start * data_stride;
+      for (int j = 0; j < RESTORATION_BORDER; j++)
+        setup_boundary_column(dst8 - 1, data_stride, dst8 + j, data_stride,
+                              rlbs->tmp_save_right[j], h, use_highbd);
+    }
+  }
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 }
 
 // This function restores the boundary lines modified by
 // setup_processing_stripe_boundary.
+//
+// Note: We need to be careful when handling the corners of the processing
+// unit, because (eg.) the top-left corner is considered to be part of
+// both the left and top borders. This means that, depending on the
+// loop_filter_across_tiles_enabled flag, the corner pixels might get
+// overwritten twice, once as part of the "top" border and once as part
+// of the "left" border (or similar for other corners).
+//
+// Everything works out fine as long as we make sure to reverse the order
+// when restoring, ie. we need to restore the left/right borders followed
+// by the top/bottom borders.
 static void restore_processing_stripe_boundary(
     const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
     int use_highbd, int h,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    const AV1PixelRect *tile_rect, int loop_filter_across_tiles_v_enabled,
+#else
     const AV1PixelRect *tile_rect, int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     uint8_t *data8, int data_stride, int copy_above, int copy_below) {
   assert(CONFIG_HIGHBITDEPTH || !use_highbd);
@@ -470,6 +512,54 @@
 
   const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
 
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  if (!loop_filter_across_tiles_v_enabled) {
+    // Restore any pixels we overwrote at the left/right edge of this
+    // processing unit.
+    const int at_tile_left_border = (limits->h_start == tile_rect->left);
+    const int at_tile_right_border = (limits->h_end == tile_rect->right);
+
+    if (at_tile_left_border) {
+      uint8_t *dst8 = data8 + limits->h_start + limits->v_start * data_stride;
+      for (int j = -RESTORATION_BORDER; j < 0; j++)
+        restore_boundary_column(dst8 + j, data_stride,
+                                rlbs->tmp_save_left[j + RESTORATION_BORDER], h,
+                                use_highbd);
+    }
+
+    if (at_tile_right_border) {
+      uint8_t *dst8 = data8 + limits->h_end + limits->v_start * data_stride;
+      for (int j = 0; j < RESTORATION_BORDER; j++)
+        restore_boundary_column(dst8 + j, data_stride, rlbs->tmp_save_right[j],
+                                h, use_highbd);
+    }
+  }
+#else
+  if (!loop_filter_across_tiles_enabled) {
+    // Restore any pixels we overwrote at the left/right edge of this
+    // processing unit.
+    const int at_tile_left_border = (limits->h_start == tile_rect->left);
+    const int at_tile_right_border = (limits->h_end == tile_rect->right);
+
+    if (at_tile_left_border) {
+      uint8_t *dst8 = data8 + limits->h_start + limits->v_start * data_stride;
+      for (int j = -RESTORATION_BORDER; j < 0; j++)
+        restore_boundary_column(dst8 + j, data_stride,
+                                rlbs->tmp_save_left[j + RESTORATION_BORDER], h,
+                                use_highbd);
+    }
+
+    if (at_tile_right_border) {
+      uint8_t *dst8 = data8 + limits->h_end + limits->v_start * data_stride;
+      for (int j = 0; j < RESTORATION_BORDER; j++)
+        restore_boundary_column(dst8 + j, data_stride, rlbs->tmp_save_right[j],
+                                h, use_highbd);
+    }
+  }
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+
   if (copy_above) {
     uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
     for (int i = -RESTORATION_BORDER; i < 0; ++i) {
@@ -490,34 +580,6 @@
       memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
     }
   }
-
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
-  if (!loop_filter_across_tiles_enabled) {
-    // Restore any pixels we overwrote at the left/right edge of this
-    // processing unit
-    // Note: We don't need to restore the corner pixels, even if we overwrote
-    // them in the equivalent place in setup_processing_stripe_boundary:
-    // Because !loop_filter_across_tiles_enabled => copy_above = copy_below = 1,
-    // the corner pixels will already have been restored before we get here.
-    const int at_tile_left_border = (limits->h_start == tile_rect->left);
-    const int at_tile_right_border = (limits->h_end == tile_rect->right);
-
-    if (at_tile_left_border) {
-      uint8_t *dst8 = data8 + limits->h_start + limits->v_start * data_stride;
-      for (int j = -RESTORATION_BORDER; j < 0; j++)
-        restore_boundary_column(dst8 + j, data_stride,
-                                rlbs->tmp_save_left[j + RESTORATION_BORDER], h,
-                                use_highbd);
-    }
-
-    if (at_tile_right_border) {
-      uint8_t *dst8 = data8 + limits->h_end + limits->v_start * data_stride;
-      for (int j = 0; j < RESTORATION_BORDER; j++)
-        restore_boundary_column(dst8 + j, data_stride, rlbs->tmp_save_right[j],
-                                h, use_highbd);
-    }
-  }
-#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 }
 #endif
 
@@ -931,11 +993,56 @@
 };
 
 const int32_t one_by_x[MAX_NELEM] = {
-  4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
-  293,  273,  256,  241,  228, 216, 205, 195, 186, 178, 171, 164,
+  4096,
+  2048,
+  1365,
+  1024,
+  819,
+  683,
+  585,
+  512,
+  455,
+  410,
+  372,
+  341,
+  315,
+  293,
+  273,
+  256,
+  241,
+  228,
+  216,
+  205,
+  195,
+  186,
+  178,
+  171,
+  164,
 #if MAX_RADIUS > 2
-  158,  152,  146,  141,  137, 132, 128, 124, 120, 117, 114, 111, 108,
-  105,  102,  100,  98,   95,  93,  91,  89,  87,  85,  84
+  158,
+  152,
+  146,
+  141,
+  137,
+  132,
+  128,
+  124,
+  120,
+  117,
+  114,
+  111,
+  108,
+  105,
+  102,
+  100,
+  98,
+  95,
+  93,
+  91,
+  89,
+  87,
+  85,
+  84
 #endif  // MAX_RADIUS > 2
 };
 
@@ -1208,9 +1315,11 @@
 #endif
 
 static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
-  wiener_filter_stripe, sgrproj_filter_stripe,
+  wiener_filter_stripe,
+  sgrproj_filter_stripe,
 #if CONFIG_HIGHBITDEPTH
-  wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd
+  wiener_filter_stripe_highbd,
+  sgrproj_filter_stripe_highbd
 #endif  // CONFIG_HIGHBITDEPTH
 };
 
@@ -1221,7 +1330,12 @@
     const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
     const AV1PixelRect *tile_rect, int tile_stripe0,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    int loop_filter_across_tiles_v_enabled,
+    int loop_filter_across_tiles_h_enabled,
+#else
     int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 #endif  // CONFIG_STRIPED_LOOP_RESTORATION
     int ss_x, int ss_y, int highbd, int bit_depth, uint8_t *data8, int stride,
@@ -1254,7 +1368,11 @@
 
     get_stripe_boundary_info(&remaining_stripes, tile_rect, ss_y,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+                             loop_filter_across_tiles_h_enabled,
+#else
                              loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
                              &copy_above, &copy_below);
 
@@ -1280,7 +1398,11 @@
     setup_processing_stripe_boundary(
         &remaining_stripes, rsb, rsb_row, highbd, h,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+        tile_rect, loop_filter_across_tiles_v_enabled,
+#else
         tile_rect, loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
         data8, stride, rlbs, copy_above, copy_below);
 
@@ -1289,8 +1411,13 @@
 
     restore_processing_stripe_boundary(&remaining_stripes, rlbs, highbd, h,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+                                       tile_rect,
+                                       loop_filter_across_tiles_v_enabled,
+#else
                                        tile_rect,
                                        loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
                                        data8, stride, copy_above, copy_below);
 
@@ -1347,7 +1474,12 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
       &rsi->boundaries, ctxt->rlbs, tile_rect, ctxt->tile_stripe0,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+      ctxt->cm->loop_filter_across_tiles_v_enabled,
+      ctxt->cm->loop_filter_across_tiles_h_enabled,
+#else
       ctxt->cm->loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 #endif  // CONFIG_STRIPED_LOOP_RESTORATION
       ctxt->ss_x, ctxt->ss_y, ctxt->highbd, ctxt->bit_depth, ctxt->data8,
@@ -1783,6 +1915,16 @@
 
     int use_deblock_above, use_deblock_below;
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (!cm->loop_filter_across_tiles_h_enabled) {
+      // In this case, we should use CDEF pixels for the above context
+      // of the topmost stripe in each region, and for the below context
+      // of the bottommost stripe in each tile. Dependent horizontal tile
+      // flag is ignored for this extension
+      use_deblock_above = (tile_stripe > 0);
+      use_deblock_below = (y1 < tile_rect.bottom);
+    } else {
+#else
     if (!cm->loop_filter_across_tiles_enabled) {
 // In this case, we should use CDEF pixels for the above context
 // of the topmost stripe in each region, and for the below context
@@ -1801,13 +1943,14 @@
 
       use_deblock_below = (y1 < tile_rect.bottom);
     } else {
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       // In this case, we should only use CDEF pixels at the top
       // and bottom of the frame as a whole; internal tile boundaries
       // can use deblocked pixels from adjacent tiles for context.
       use_deblock_above = (frame_stripe > 0);
       use_deblock_below = (y1 < plane_height);
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
     }
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index da73e73..d2ade38 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -213,20 +213,26 @@
 #define RESTORATION_LINEBUFFER_WIDTH \
   (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_EXTRA_HORZ)
 
+// Similarly, the column buffers (used when we're at a vertical tile edge
+// that we can't filter across) need space for one processing unit's worth
+// of pixels, plus the top/bottom border width
+#define RESTORATION_COLBUFFER_HEIGHT \
+  (RESTORATION_PROC_UNIT_SIZE + 2 * RESTORATION_BORDER)
+
 typedef struct {
   // Temporary buffers to save/restore 3 lines above/below the restoration
   // stripe.
   uint16_t tmp_save_above[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
   uint16_t tmp_save_below[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
   // Column buffers, for storing 3 pixels at the left/right of each tile
   // when loopfiltering across tiles is disabled.
   //
   // Note: These arrays only need to store the pixels immediately left/right
   // of each processing unit; the corner pixels (top-left, etc.) are always
   // stored into the above/below arrays.
-  uint16_t tmp_save_left[RESTORATION_BORDER][RESTORATION_PROC_UNIT_SIZE];
-  uint16_t tmp_save_right[RESTORATION_BORDER][RESTORATION_PROC_UNIT_SIZE];
+  uint16_t tmp_save_left[RESTORATION_BORDER][RESTORATION_COLBUFFER_HEIGHT];
+  uint16_t tmp_save_right[RESTORATION_BORDER][RESTORATION_COLBUFFER_HEIGHT];
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 } RestorationLineBuffers;
 
@@ -314,7 +320,12 @@
     const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
     const AV1PixelRect *tile_rect, int tile_stripe0,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    int loop_filter_across_tiles_v_enabled,
+    int loop_filter_across_tiles_h_enabled,
+#else
     int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 #endif  // CONFIG_STRIPED_LOOP_RESTORATION
     int ss_x, int ss_y, int highbd, int bit_depth, uint8_t *data8, int stride,
diff --git a/av1/common/tile_common.c b/av1/common/tile_common.c
index 6b5f367..3ddcc86 100644
--- a/av1/common/tile_common.c
+++ b/av1/common/tile_common.c
@@ -279,7 +279,9 @@
   return r;
 }
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+// this function should only be called when loop_filter_across_tile flag is
+// set to 0
 void av1_setup_across_tile_boundary_info(const AV1_COMMON *const cm,
                                          const TileInfo *const tile_info) {
   if (cm->tile_cols * cm->tile_rows > 1) {
@@ -292,9 +294,14 @@
     const int col_diff = tile_info->mi_col_end - tile_info->mi_col_start;
     int row, col;
 
-#if CONFIG_DEPENDENT_HORZTILES
+// when CONFIG_LOOPFILTERING_ACROSS_TILES_EXT is enabled, whether tile
+// is dependent horizontal tile or not is ignored. tile boundary is always
+// initialized based on the actual tile boundary.
+#if CONFIG_DEPENDENT_HORZTILES && !CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
     if (!cm->dependent_horz_tiles || tile_info->tg_horz_boundary)
-#endif  // CONFIG_DEPENDENT_HORZTILES
+#elif CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (cm->loop_filter_across_tiles_h_enabled == 0)
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
     {
       mi = mi_start;
       for (col = 0; col < col_diff; ++col) {
@@ -303,32 +310,50 @@
       }
     }
 
-    mi = mi_start;
-    for (row = 0; row < row_diff; ++row) {
-      mi->mbmi.boundary_info |= TILE_LEFT_BOUNDARY;
-      mi += cm->mi_stride;
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (cm->loop_filter_across_tiles_v_enabled == 0)
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    {
+      mi = mi_start;
+      for (row = 0; row < row_diff; ++row) {
+        mi->mbmi.boundary_info |= TILE_LEFT_BOUNDARY;
+        mi += cm->mi_stride;
+      }
     }
 
-    mi = mi_start + (row_diff - 1) * cm->mi_stride;
-
-    // explicit bounds checking
-    assert(mi + col_diff <= cm->mip + cm->mi_alloc_size);
-
-    for (col = 0; col < col_diff; ++col) {
-      mi->mbmi.boundary_info |= TILE_BOTTOM_BOUNDARY;
-      mi += 1;
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (cm->loop_filter_across_tiles_h_enabled == 0)
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    {
+      mi = mi_start + (row_diff - 1) * cm->mi_stride;
+      // explicit bounds checking
+      assert(mi + col_diff <= cm->mip + cm->mi_alloc_size);
+      for (col = 0; col < col_diff; ++col) {
+        mi->mbmi.boundary_info |= TILE_BOTTOM_BOUNDARY;
+        mi += 1;
+      }
     }
 
-    mi = mi_start + col_diff - 1;
-    for (row = 0; row < row_diff; ++row) {
-      mi->mbmi.boundary_info |= TILE_RIGHT_BOUNDARY;
-      mi += cm->mi_stride;
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (cm->loop_filter_across_tiles_v_enabled == 0)
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    {
+      mi = mi_start + col_diff - 1;
+      for (row = 0; row < row_diff; ++row) {
+        mi->mbmi.boundary_info |= TILE_RIGHT_BOUNDARY;
+        mi += cm->mi_stride;
+      }
     }
-  }
+  }  // end of cm->tile_cols * cm->tile_rows > 1
 }
 
 int av1_disable_loopfilter_on_tile_boundary(const struct AV1Common *cm) {
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  return ((!cm->loop_filter_across_tiles_v_enabled ||
+           !cm->loop_filter_across_tiles_h_enabled) &&
+#else
   return (!cm->loop_filter_across_tiles_enabled &&
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
           (cm->tile_cols * cm->tile_rows > 1));
 }
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
diff --git a/av1/common/tile_common.h b/av1/common/tile_common.h
index d16ba9f..c7f1565 100644
--- a/av1/common/tile_common.h
+++ b/av1/common/tile_common.h
@@ -54,7 +54,7 @@
 AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info,
                                const struct AV1Common *cm, int is_uv);
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 void av1_setup_across_tile_boundary_info(const struct AV1Common *const cm,
                                          const TileInfo *const tile_info);
 int av1_disable_loopfilter_on_tile_boundary(const struct AV1Common *cm);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d09abd3..9106744 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1592,10 +1592,23 @@
     cm->dependent_horz_tiles = 0;
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (cm->tile_cols > 1) {
+      cm->loop_filter_across_tiles_v_enabled = aom_rb_read_bit(rb);
+    } else {
+      cm->loop_filter_across_tiles_v_enabled = 1;
+    }
+    if (cm->tile_rows > 1) {
+      cm->loop_filter_across_tiles_h_enabled = aom_rb_read_bit(rb);
+    } else {
+      cm->loop_filter_across_tiles_h_enabled = 1;
+    }
+#else
     if (cm->tile_cols * cm->tile_rows > 1)
       cm->loop_filter_across_tiles_enabled = aom_rb_read_bit(rb);
     else
       cm->loop_filter_across_tiles_enabled = 1;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
     if (cm->tile_cols * cm->tile_rows > 1) {
@@ -1648,10 +1661,23 @@
       cm->dependent_horz_tiles = 0;
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+    if (cm->tile_cols > 1) {
+      cm->loop_filter_across_tiles_v_enabled = aom_rb_read_bit(rb);
+    } else {
+      cm->loop_filter_across_tiles_v_enabled = 1;
+    }
+    if (cm->tile_rows > 1) {
+      cm->loop_filter_across_tiles_h_enabled = aom_rb_read_bit(rb);
+    } else {
+      cm->loop_filter_across_tiles_h_enabled = 1;
+    }
+#else
     if (cm->tile_cols * cm->tile_rows > 1)
       cm->loop_filter_across_tiles_enabled = aom_rb_read_bit(rb);
     else
       cm->loop_filter_across_tiles_enabled = 1;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
     // tile size magnitude
@@ -1945,14 +1971,18 @@
   }
 }
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 static void dec_setup_across_tile_boundary_info(
     const AV1_COMMON *const cm, const TileInfo *const tile_info) {
   if (tile_info->mi_row_start >= tile_info->mi_row_end ||
       tile_info->mi_col_start >= tile_info->mi_col_end)
     return;
-
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  if (!cm->loop_filter_across_tiles_v_enabled ||
+      !cm->loop_filter_across_tiles_h_enabled) {
+#else
   if (!cm->loop_filter_across_tiles_enabled) {
+#endif
     av1_setup_across_tile_boundary_info(cm, tile_info);
   }
 }
@@ -2125,7 +2155,7 @@
       av1_reset_loop_restoration(&td->xd);
 #endif  // CONFIG_LOOP_RESTORATION
 
-#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES || CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
       dec_setup_across_tile_boundary_info(cm, &tile_info);
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
@@ -3236,7 +3266,7 @@
 // change every frame (particularly when dependent-horztiles is also
 // enabled); when it is disabled, the only information stored is the frame
 // boundaries, which only depend on the frame size.
-#if !CONFIG_LOOPFILTERING_ACROSS_TILES
+#if !CONFIG_LOOPFILTERING_ACROSS_TILES && !CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
   if (cm->width != cm->last_width || cm->height != cm->last_height)
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
   {
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 9f596c1..ed809ed 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2838,8 +2838,17 @@
 #endif  // CONFIG_EXT_TILE
 
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  if (cm->tile_cols > 1) {
+    aom_wb_write_bit(wb, cm->loop_filter_across_tiles_v_enabled);
+  }
+  if (cm->tile_rows > 1) {
+    aom_wb_write_bit(wb, cm->loop_filter_across_tiles_h_enabled);
+  }
+#else
   if (cm->tile_cols * cm->tile_rows > 1)
     aom_wb_write_bit(wb, cm->loop_filter_across_tiles_enabled);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 }
 
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 2ac3b37..1253882 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -3389,9 +3389,15 @@
 #endif
 
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  if (!cm->loop_filter_across_tiles_v_enabled ||
+      !cm->loop_filter_across_tiles_h_enabled)
+    av1_setup_across_tile_boundary_info(cm, tile_info);
+#else
   if (!cm->loop_filter_across_tiles_enabled)
     av1_setup_across_tile_boundary_info(cm, tile_info);
-#endif
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
   av1_crc_calculator_init(&td->mb.tx_rd_record.crc_calculator, 24, 0x5D6DCB);
 
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index f7bd121..9b89e2b 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -1016,8 +1016,15 @@
 #endif
 
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  cm->loop_filter_across_tiles_v_enabled =
+      cpi->oxcf.loop_filter_across_tiles_v_enabled;
+  cm->loop_filter_across_tiles_h_enabled =
+      cpi->oxcf.loop_filter_across_tiles_h_enabled;
+#else
   cm->loop_filter_across_tiles_enabled =
       cpi->oxcf.loop_filter_across_tiles_enabled;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 }
 
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index f84485e..1761b9a 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -265,7 +265,12 @@
   int dependent_horz_tiles;
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  int loop_filter_across_tiles_v_enabled;
+  int loop_filter_across_tiles_h_enabled;
+#else
   int loop_filter_across_tiles_enabled;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 
   int max_threads;
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 37b0f1a..2a643c3 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -184,9 +184,14 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
       &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+      cm->loop_filter_across_tiles_v_enabled,
+      cm->loop_filter_across_tiles_h_enabled,
+#else
       cm->loop_filter_across_tiles_enabled,
-#endif
-#endif
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
       is_uv && cm->subsampling_x, is_uv && cm->subsampling_y, highbd, bit_depth,
       fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
       rsc->dst->strides[is_uv], cm->rst_tmpbuf);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 5e9499e..a9469cd 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -156,6 +156,7 @@
 set(CONFIG_JNT_COMP 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_KF_CTX 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_LOOPFILTERING_ACROSS_TILES 1 CACHE NUMBER "AV1 experiment flag.")
+set(CONFIG_LOOPFILTERING_ACROSS_TILES_EXT 0 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_LOOPFILTER_LEVEL 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_LOOP_RESTORATION 1 CACHE NUMBER "AV1 experiment flag.")
 set(CONFIG_LPF_SB 0 CACHE NUMBER "AV1 experiment flag.")
diff --git a/configure b/configure
index 59d3d4a..5aa43d1 100755
--- a/configure
+++ b/configure
@@ -285,6 +285,7 @@
     parallel_deblocking
     deblock_13tap
     loopfiltering_across_tiles
+    loopfiltering_across_tiles_ext
     tempmv_signaling
     rd_debug
     reference_buffer
@@ -550,6 +551,7 @@
     enabled loopfilter_level && enable_feature ext_delta_q
     enabled striped_loop_restoration && enable_feature loop_restoration
     enabled adapt_scan && enable_feature lv_map
+    enabled loopfiltering_across_tiles_ext && enable_feature loopfiltering_across_tiles
 
     soft_enable daala_tx_dst32
 
diff --git a/test/av1_ext_tile_test.cc b/test/av1_ext_tile_test.cc
index 034b071..dcf6bc6 100644
--- a/test/av1_ext_tile_test.cc
+++ b/test/av1_ext_tile_test.cc
@@ -91,8 +91,13 @@
       encoder->Control(AV1E_SET_SUPERBLOCK_SIZE, AOM_SUPERBLOCK_SIZE_64X64);
 #endif
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_V, 0);
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_H, 0);
+#else
       encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
 #endif
+#endif
     }
 
     if (video->frame() == 1) {
diff --git a/test/ethread_test.cc b/test/ethread_test.cc
index 2ae3c0d..63a5e12 100644
--- a/test/ethread_test.cc
+++ b/test/ethread_test.cc
@@ -72,7 +72,12 @@
     if (!encoder_initialized_) {
       SetTileSize(encoder);
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_V, 0);
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_H, 0);
+#else
       encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       encoder->Control(AOME_SET_CPUUSED, set_cpu_used_);
       if (encoding_mode_ != ::libaom_test::kRealTime) {
diff --git a/test/superframe_test.cc b/test/superframe_test.cc
index 33dabd2..eec1a16 100644
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@@ -51,7 +51,12 @@
       encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
       encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_V, 0);
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_H, 0);
+#else
       encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     }
   }
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc
index 832227f..def4aae 100644
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -64,7 +64,12 @@
       encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
       encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
 #if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_V, 0);
+      encoder->Control(AV1E_SET_TILE_LOOPFILTER_H, 0);
+#else
       encoder->Control(AV1E_SET_TILE_LOOPFILTER, 0);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
 #endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
       SetCpuUsed(encoder);
     }