striped-loop-restoration: Respect tile experiments

As of patch https://aomedia-review.googlesource.com/c/aom/+/28821 ,
loop-restoration units cannot cross tile borders. But the context
around each processing unit was still allowed to cross tile borders.
This is fine in the usual case - but, when loop filtering across tiles
is switched off, we're supposed to be able to decode each tile completely
independently (each tile column, if dependent-horztiles is on).

Roughly, the change we need to make is:
When loop filtering across tiles is switched off, we treat each tile
as if it were a full frame, and extend the CDEF output for that tile
to form a 3-pixel border around the tile. We only use deblocked
above/below pixels for processing unit boundaries which lie inside
a tile.

In terms of the code, this is implemented in two parts. This only
applies when the loop_filter_across_tiles_flag is false; otherwise,
we keep the old behaviour.

* For processing units at the top edge of a tile, fill the above context
  with copies of the topmost line of CDEF output *from the same tile*,
  rather than using deblocked pixels from the tile above.
  The below context of processing units at the bottom edge of a tile
  is treated analogously.

* When setting up the boundary for a processing stripe at the left edge
  of a tile, fill the stripe's left boundary with copies of the
  leftmost column of CDEF output from the same tile. Again, processing
  stripes at the right edge of a tile are treated analogously.

  Similarly to the above/below boundaries, we store the overwritten
  pixels into a pair of left/right context buffers, and restore them
  to their original values once we've dealt with that processing stripe.

Change-Id: I53a0932793c1c56dc037683c6a4353a3f5dc4539
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 938dd96..1532c15 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -229,6 +229,46 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
 #define REAL_PTR(hbd, d) ((hbd) ? (uint8_t *)CONVERT_TO_SHORTPTR(d) : (d))
 
+// Helper function: Save one column of left/right context to the appropriate
+// column buffers, then extend the edge of the current tile into that column.
+//
+// Note: The code to deal with above/below boundaries may have filled out
+// the corners of the border with data from the tiles to our left or right,
+// which isn't allowed. To fix that up, we need to include the top and
+// bottom context regions in the area which we extend.
+// But note that we don't need to store the pixels we overwrite in the
+// corners of the context area - those have already been overwritten once,
+// so their original values are already in rlbs->tmp_save_{above,below}.
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+static void setup_boundary_column(const uint8_t *src8, int src_stride,
+                                  uint8_t *dst8, int dst_stride, int full_h,
+                                  int this_h, RestorationLineBuffers *rlbs,
+                                  int buf_idx, int use_highbd) {
+  const int central_h = AOMMIN(full_h, this_h + RESTORATION_BORDER);
+  if (use_highbd) {
+    const uint16_t *src16 = CONVERT_TO_SHORTPTR(src8);
+    uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst8);
+    for (int i = -RESTORATION_BORDER; i < 0; i++)
+      dst16[i * dst_stride] = src16[i * src_stride];
+    for (int i = 0; i < central_h; i++) {
+      rlbs->tmp_save_left[buf_idx][i] = dst16[i * dst_stride];
+      dst16[i * dst_stride] = src16[i * src_stride];
+    }
+    for (int i = full_h; i < this_h + RESTORATION_BORDER; i++)
+      dst16[i * dst_stride] = src16[i * src_stride];
+  } else {
+    for (int i = -RESTORATION_BORDER; i < 0; i++)
+      dst8[i * dst_stride] = src8[i * src_stride];
+    for (int i = 0; i < central_h; i++) {
+      rlbs->tmp_save_left[buf_idx][i] = dst8[i * dst_stride];
+      dst8[i * dst_stride] = src8[i * src_stride];
+    }
+    for (int i = full_h; i < this_h + RESTORATION_BORDER; i++)
+      dst8[i * dst_stride] = src8[i * src_stride];
+  }
+}
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+
 // With striped loop restoration, the filtering for each 64-pixel stripe gets
 // most of its input from the output of CDEF (stored in data8), but pixels just
 // above and below the stripe come straight from the deblocker. These have been
@@ -249,6 +289,9 @@
 static int setup_processing_stripe_boundary(
     const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
     const AV1PixelRect *tile_rect, int tile_stripe0, int ss_y, int use_highbd,
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+    int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     uint8_t *data8, int data_stride, RestorationLineBuffers *rlbs) {
   assert(CONFIG_HIGHBITDEPTH || !use_highbd);
 
@@ -262,36 +305,33 @@
   const int line_size = line_width << use_highbd;
 
   const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
-  const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
+  const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
   const int rtile_offset = RESTORATION_TILE_OFFSET >> ss_y;
 
   // Note that we don't need to worry about rounding here: this will be an
   // exact multiple except when limits->v_start == tile_y0 (at the top of the
   // tile).
   const int tile_stripe =
-      (limits->v_start - tile_rect->top + rtile_offset) / stripe_height;
+      (limits->v_start - tile_rect->top + rtile_offset) / full_stripe_height;
   const int frame_stripe = tile_stripe0 + tile_stripe;
 
-  // Replace RESTORATION_BORDER pixels above the top of the stripe, unless this
-  // is the top of the frame. We expand RESTORATION_CTX_VERT=2 lines from
-  // rsb->stripe_boundary_above to fill RESTORATION_BORDER=3 lines of above
-  // pixels. This is done by duplicating the topmost of the 2 lines (see the
-  // AOMMAX call when calculating src_row, which gets the values 0, 0, 1 for i
-  // = -3, -2, -1).
-  if (frame_stripe > 0) {
-    const int above_buf_y = RESTORATION_CTX_VERT * (frame_stripe - 1);
-    uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
+  // Replace RESTORATION_BORDER pixels above the top of the stripe
+  // We expand RESTORATION_CTX_VERT=2 lines from rsb->stripe_boundary_above
+  // to fill RESTORATION_BORDER=3 lines of above pixels. This is done by
+  // duplicating the topmost of the 2 lines (see the AOMMAX call when
+  // calculating src_row, which gets the values 0, 0, 1 for i = -3, -2, -1).
+  const int above_buf_y = RESTORATION_CTX_VERT * frame_stripe;
+  uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
 
-    for (int i = -RESTORATION_BORDER; i < 0; ++i) {
-      const int buf_row = above_buf_y + AOMMAX(i + RESTORATION_CTX_VERT, 0);
-      const int buf_off = buf_x0_off + buf_row * buf_stride;
-      const uint8_t *buf = rsb->stripe_boundary_above + (buf_off << use_highbd);
-      uint8_t *dst8 = data8_tl + i * data_stride;
-      // Save old pixels, then replace with data from stripe_boundary_above
-      memcpy(rlbs->tmp_save_above[i + RESTORATION_BORDER],
-             REAL_PTR(use_highbd, dst8), line_size);
-      memcpy(REAL_PTR(use_highbd, dst8), buf, line_size);
-    }
+  for (int i = -RESTORATION_BORDER; i < 0; ++i) {
+    const int buf_row = above_buf_y + AOMMAX(i + RESTORATION_CTX_VERT, 0);
+    const int buf_off = buf_x0_off + buf_row * buf_stride;
+    const uint8_t *buf = rsb->stripe_boundary_above + (buf_off << use_highbd);
+    uint8_t *dst8 = data8_tl + i * data_stride;
+    // Save old pixels, then replace with data from stripe_boundary_above
+    memcpy(rlbs->tmp_save_above[i + RESTORATION_BORDER],
+           REAL_PTR(use_highbd, dst8), line_size);
+    memcpy(REAL_PTR(use_highbd, dst8), buf, line_size);
   }
 
   // Replace up to RESTORATION_BORDER pixels below the bottom of the
@@ -300,7 +340,7 @@
   //
   // We might not write that many rows if the stripe isn't of full height
   // (which might happen at the bottom of a restoration unit).
-  const int stripe_bottom = limits->v_start + stripe_height;
+  const int stripe_bottom = limits->v_start + full_stripe_height;
   const int below_buf_y = RESTORATION_CTX_VERT * frame_stripe;
   uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
 
@@ -317,6 +357,34 @@
     memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
   }
 
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+  if (!loop_filter_across_tiles_enabled) {
+    // If loopfiltering across tiles is disabled, we need to check if we're at
+    // the edge of the current tile column. If we are, we need to extend the
+    // leftmost/rightmost column within the tile by 3 pixels, so that the output
+    // doesn't depend on pixels from the next column over.
+    const int at_tile_left_border = (limits->h_start == tile_rect->left);
+    const int at_tile_right_border = (limits->h_end == tile_rect->right);
+    const int this_stripe_height =
+        AOMMIN(full_stripe_height, limits->v_end - limits->v_start);
+    if (at_tile_left_border) {
+      uint8_t *dst8 = data8 + limits->h_start + limits->v_start * data_stride;
+      for (int j = -RESTORATION_BORDER; j < 0; j++)
+        setup_boundary_column(dst8, data_stride, dst8 + j, data_stride,
+                              full_stripe_height, this_stripe_height, rlbs,
+                              j + RESTORATION_BORDER, use_highbd);
+    }
+
+    if (at_tile_right_border) {
+      uint8_t *dst8 = data8 + limits->h_end + limits->v_start * data_stride;
+      for (int j = 0; j < RESTORATION_BORDER; j++)
+        setup_boundary_column(dst8 - 1, data_stride, dst8 + j, data_stride,
+                              full_stripe_height, this_stripe_height, rlbs, j,
+                              use_highbd);
+    }
+  }
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+
   // Finally, return the actual height of this stripe.
   return AOMMIN(limits->v_end, stripe_bottom) - limits->v_start;
 }
@@ -325,7 +393,10 @@
 // setup_processing_stripe_boundary.
 static void restore_processing_stripe_boundary(
     const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
-    const AV1PixelRect *tile_rect, int tile_stripe0, int ss_y, int use_highbd,
+    int ss_y, int use_highbd,
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+    const AV1PixelRect *tile_rect, int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
     uint8_t *data8, int data_stride) {
   assert(CONFIG_HIGHBITDEPTH || !use_highbd);
 
@@ -334,23 +405,16 @@
   const int line_size = line_width << use_highbd;
 
   const int data_x0 = limits->h_start - RESTORATION_EXTRA_HORZ;
-  const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
-  const int rtile_offset = RESTORATION_TILE_OFFSET >> ss_y;
+  const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
 
-  const int tile_stripe =
-      (limits->v_start - tile_rect->top + rtile_offset) / stripe_height;
-  const int frame_stripe = tile_stripe0 + tile_stripe;
-
-  if (frame_stripe > 0) {
-    uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
-    for (int i = -RESTORATION_BORDER; i < 0; ++i) {
-      uint8_t *dst8 = data8_tl + i * data_stride;
-      memcpy(REAL_PTR(use_highbd, dst8),
-             rlbs->tmp_save_above[i + RESTORATION_BORDER], line_size);
-    }
+  uint8_t *data8_tl = data8 + data_x0 + limits->v_start * data_stride;
+  for (int i = -RESTORATION_BORDER; i < 0; ++i) {
+    uint8_t *dst8 = data8_tl + i * data_stride;
+    memcpy(REAL_PTR(use_highbd, dst8),
+           rlbs->tmp_save_above[i + RESTORATION_BORDER], line_size);
   }
 
-  const int stripe_bottom = limits->v_start + stripe_height;
+  const int stripe_bottom = limits->v_start + full_stripe_height;
   uint8_t *data8_bl = data8 + data_x0 + stripe_bottom * data_stride;
 
   for (int i = 0; i < RESTORATION_BORDER; ++i) {
@@ -359,6 +423,39 @@
     uint8_t *dst8 = data8_bl + i * data_stride;
     memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
   }
+
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+  if (!loop_filter_across_tiles_enabled) {
+    // Restore any pixels we overwrote at the left/right edge of this
+    // processing unit
+    // Note: We don't need to restore the corner pixels, even if we overwrote
+    // them in the equivalent place in setup_processing_stripe_boundary.
+    // Those pixels will already have been restored in the code above.
+    const int at_tile_left_border = (limits->h_start == tile_rect->left);
+    const int at_tile_right_border = (limits->h_end == tile_rect->right);
+    const int this_stripe_height =
+        AOMMIN(full_stripe_height, limits->v_end - limits->v_start);
+    const int border_height =
+        AOMMIN(full_stripe_height, this_stripe_height + RESTORATION_BORDER);
+
+    if (at_tile_left_border) {
+      uint8_t *dst8 = data8 + limits->h_start + limits->v_start * data_stride;
+      for (int j = -RESTORATION_BORDER; j < 0; j++)
+        for (int i = 0; i < border_height; i++) {
+          dst8[i * data_stride + j] =
+              rlbs->tmp_save_left[j + RESTORATION_BORDER][i];
+        }
+    }
+
+    if (at_tile_right_border) {
+      uint8_t *dst8 = data8 + limits->h_end + limits->v_start * data_stride;
+      for (int j = 0; j < RESTORATION_BORDER; j++)
+        for (int i = 0; i < border_height; i++) {
+          dst8[i * data_stride + j] = rlbs->tmp_save_left[j][i];
+        }
+    }
+  }
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 }
 #endif
 
@@ -1256,7 +1353,10 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
     const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
     const AV1PixelRect *tile_rect, int tile_stripe0,
-#endif
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+    int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
     int ss_x, int ss_y, int highbd, int bit_depth, uint8_t *data8, int stride,
     uint8_t *dst8, int dst_stride, int32_t *tmpbuf) {
   RestorationType unit_rtype = rui->restoration_type;
@@ -1284,15 +1384,21 @@
   while (i < unit_h) {
     remaining_stripes.v_start = limits->v_start + i;
     int h = setup_processing_stripe_boundary(&remaining_stripes, rsb, tile_rect,
-                                             tile_stripe0, ss_y, highbd, data8,
-                                             stride, rlbs);
+                                             tile_stripe0, ss_y, highbd,
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+                                             loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+                                             data8, stride, rlbs);
 
     stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
                   dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth);
 
-    restore_processing_stripe_boundary(&remaining_stripes, rlbs, tile_rect,
-                                       tile_stripe0, ss_y, highbd, data8,
-                                       stride);
+    restore_processing_stripe_boundary(&remaining_stripes, rlbs, ss_y, highbd,
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+                                       tile_rect,
+                                       loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+                                       data8, stride);
 
     i += h;
   }
@@ -1303,7 +1409,7 @@
     stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
                   dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth);
   }
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 }
 
 typedef struct {
@@ -1312,7 +1418,7 @@
   RestorationLineBuffers *rlbs;
   const AV1_COMMON *cm;
   int tile_stripe0;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
   int ss_x, ss_y;
   int highbd, bit_depth;
   uint8_t *data8, *dst8;
@@ -1329,7 +1435,7 @@
 #else
   (void)tile_row;
   (void)priv;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 }
 
 static void filter_frame_on_unit(const RestorationTileLimits *limits,
@@ -1340,13 +1446,16 @@
 
 #if !CONFIG_STRIPED_LOOP_RESTORATION
   (void)tile_rect;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 
   av1_loop_restoration_filter_unit(
       limits, &rsi->unit_info[rest_unit_idx],
 #if CONFIG_STRIPED_LOOP_RESTORATION
       &rsi->boundaries, ctxt->rlbs, tile_rect, ctxt->tile_stripe0,
-#endif
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+      ctxt->cm->loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
       ctxt->ss_x, ctxt->ss_y, ctxt->highbd, ctxt->bit_depth, ctxt->data8,
       ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, ctxt->tmpbuf);
 }
@@ -1396,7 +1505,7 @@
 
 #if CONFIG_STRIPED_LOOP_RESTORATION
   RestorationLineBuffers rlbs;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 #if CONFIG_HIGHBITDEPTH
   const int bit_depth = cm->bit_depth;
   const int highbd = cm->use_highbitdepth;
@@ -1427,7 +1536,7 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
     ctxt.rlbs = &rlbs;
     ctxt.cm = cm;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
     ctxt.ss_x = is_uv && cm->subsampling_x;
     ctxt.ss_y = is_uv && cm->subsampling_y;
     ctxt.highbd = highbd;
@@ -1481,7 +1590,7 @@
     if (limits.v_end < tile_rect->bottom) limits.v_end -= voffset;
 #else
     (void)ss_y;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 
     int x0 = 0, j = 0;
     while (x0 < tile_w) {
@@ -1667,10 +1776,10 @@
   }
 }
 
-static void save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
-                                const AV1_COMMON *cm, int plane, int row,
-                                int stripe, int use_highbd, int is_above,
-                                RestorationStripeBoundaries *boundaries) {
+static void save_deblock_boundary_lines(
+    const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm, int plane, int row,
+    int stripe, int use_highbd, int is_above,
+    RestorationStripeBoundaries *boundaries) {
   const int is_uv = plane > 0;
   const int src_width = frame->crop_widths[is_uv];
   const int src_height = frame->crop_heights[is_uv];
@@ -1714,39 +1823,130 @@
                RESTORATION_EXTRA_HORZ, use_highbd);
 }
 
+static void save_cdef_boundary_lines(const YV12_BUFFER_CONFIG *frame,
+                                     const AV1_COMMON *cm, int plane, int row,
+                                     int stripe, int use_highbd, int is_above,
+                                     RestorationStripeBoundaries *boundaries) {
+  const int is_uv = plane > 0;
+  const int src_height = frame->crop_heights[is_uv];
+  const uint8_t *src_buf = REAL_PTR(use_highbd, frame->buffers[plane]);
+  const int src_stride = frame->strides[is_uv] << use_highbd;
+  const uint8_t *src_rows = src_buf + row * src_stride;
+
+  uint8_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
+                               : boundaries->stripe_boundary_below;
+  uint8_t *bdry_start = bdry_buf + (RESTORATION_EXTRA_HORZ << use_highbd);
+  const int bdry_stride = boundaries->stripe_boundary_stride << use_highbd;
+  uint8_t *bdry_rows = bdry_start + RESTORATION_CTX_VERT * stripe * bdry_stride;
+
+  const int lines_to_save = AOMMIN(RESTORATION_CTX_VERT, src_height - row);
+#if CONFIG_FRAME_SUPERRES
+  // At the point where this function is called, we've already applied
+  // superres. So we don't need to extend the lines here, we can just
+  // pull directly from the topmost row of the upscaled frame.
+  const int ss_x = is_uv && cm->subsampling_x;
+  const int upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x;
+#else
+  (void)cm;
+  const int src_width = frame->crop_widths[is_uv];
+  const int upscaled_width = src_width;
+#endif  // CONFIG_FRAME_SUPERRES
+  const int line_bytes = upscaled_width << use_highbd;
+  for (int i = 0; i < lines_to_save; i++) {
+    // Copy the line at 'row' into both context lines. This is because
+    // we want to (effectively) extend the outermost row of CDEF data
+    // from this tile to produce a border, rather than using deblocked
+    // pixels from the tile above/below.
+    memcpy(bdry_rows + i * bdry_stride, src_rows, line_bytes);
+  }
+  extend_lines(bdry_rows, upscaled_width, lines_to_save, bdry_stride,
+               RESTORATION_EXTRA_HORZ, use_highbd);
+}
+
 static void save_tile_row_boundary_lines(const YV12_BUFFER_CONFIG *frame,
                                          int tile_row,
-                                         const AV1PixelRect *tile_rect,
+                                         const TileInfo *tile_info,
                                          int use_highbd, int plane,
-                                         AV1_COMMON *cm) {
+                                         AV1_COMMON *cm, int after_cdef) {
   const int is_uv = plane > 0;
   const int ss_y = is_uv && cm->subsampling_y;
   const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
   const int stripe_off = RESTORATION_TILE_OFFSET >> ss_y;
+  const AV1PixelRect tile_rect = av1_get_tile_rect(tile_info, cm, plane > 0);
 
   RestorationStripeBoundaries *boundaries = &cm->rst_info[plane].boundaries;
 
   const int stripe0 = (tile_row == 0) ? 0 : cm->rst_end_stripe[tile_row - 1];
 
+  int plane_height = cm->mi_rows << (MI_SIZE_LOG2 - ss_y);
+
   int tile_stripe;
   for (tile_stripe = 0;; ++tile_stripe) {
     const int rel_y0 = AOMMAX(0, tile_stripe * stripe_height - stripe_off);
-    const int y0 = tile_rect->top + rel_y0;
-    if (y0 >= tile_rect->bottom) break;
+    const int y0 = tile_rect.top + rel_y0;
+    if (y0 >= tile_rect.bottom) break;
 
     const int rel_y1 = (tile_stripe + 1) * stripe_height - stripe_off;
-    const int y1 = AOMMIN(tile_rect->top + rel_y1, tile_rect->bottom);
+    const int y1 = AOMMIN(tile_rect.top + rel_y1, tile_rect.bottom);
 
     const int frame_stripe = stripe0 + tile_stripe;
 
-    if (frame_stripe > 0) {
-      // Save RESTORATION_CTX_VERT lines above the stripe if frame_stripe > 0
-      save_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT,
-                          frame_stripe - 1, use_highbd, 1, boundaries);
+    int use_deblock_above, use_deblock_below;
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+    if (!cm->loop_filter_across_tiles_enabled) {
+// In this case, we should use CDEF pixels for the above context
+// of the topmost stripe in each region, and for the below context
+// of the bottommost stripe in each tile.
+//
+// As a special case, when dependent-horztiles is enabled, we may be
+// allowed to use pixels from the tile above us. But we don't use pixels
+// from the tile below in that case, to match the behaviour of
+// av1_setup_across_tile_boundary_info()
+#if CONFIG_DEPENDENT_HORZTILES
+      if (cm->dependent_horz_tiles && !tile_info->tg_horz_boundary)
+        use_deblock_above = (frame_stripe > 0);
+      else
+#endif
+        use_deblock_above = (tile_stripe > 0);
+
+      use_deblock_below = (y1 < tile_rect.bottom);
+    } else {
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+      // In this case, we should only use CDEF pixels at the top
+      // and bottom of the frame as a whole; internal tile boundaries
+      // can use deblocked pixels from adjacent tiles for context.
+      use_deblock_above = (frame_stripe > 0);
+      use_deblock_below = (y1 < plane_height);
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
     }
-    // Always save RESTORATION_CTX_VERT lines below the LR stripe
-    save_boundary_lines(frame, cm, plane, y1, frame_stripe, use_highbd, 0,
-                        boundaries);
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+
+    if (!after_cdef) {
+      // Save deblocked context where needed.
+      if (use_deblock_above) {
+        save_deblock_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT,
+                                    frame_stripe, use_highbd, 1, boundaries);
+      }
+      if (use_deblock_below) {
+        save_deblock_boundary_lines(frame, cm, plane, y1, frame_stripe,
+                                    use_highbd, 0, boundaries);
+      }
+    } else {
+      // Save CDEF context where needed. Note that we need to save the CDEF
+      // context for a particular boundary iff we *didn't* save deblocked
+      // context for that boundary.
+      //
+      // In addition, we need to save copies of the outermost line within
+      // the tile, rather than using data from outside the tile.
+      if (!use_deblock_above) {
+        save_cdef_boundary_lines(frame, cm, plane, y0, frame_stripe, use_highbd,
+                                 1, boundaries);
+      }
+      if (!use_deblock_below) {
+        save_cdef_boundary_lines(frame, cm, plane, y1 - 1, frame_stripe,
+                                 use_highbd, 0, boundaries);
+      }
+    }
   }
 }
 
@@ -1754,7 +1954,7 @@
 // lines to be used as boundary in the loop restoration process. The
 // lines are saved in rst_internal.stripe_boundary_lines
 void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
-                                              AV1_COMMON *cm) {
+                                              AV1_COMMON *cm, int after_cdef) {
 #if CONFIG_HIGHBITDEPTH
   const int use_highbd = cm->use_highbitdepth;
 #else
@@ -1765,9 +1965,8 @@
     TileInfo tile_info;
     for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
       av1_tile_init(&tile_info, cm, tile_row, 0);
-      AV1PixelRect tile_rect = av1_get_tile_rect(&tile_info, cm, p > 0);
-      save_tile_row_boundary_lines(frame, tile_row, &tile_rect, use_highbd, p,
-                                   cm);
+      save_tile_row_boundary_lines(frame, tile_row, &tile_info, use_highbd, p,
+                                   cm, after_cdef);
     }
   }
 }
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index b4ef92e..3bea5ed 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -29,20 +29,20 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
 // Filter tile grid offset upwards compared to the superblock grid
 #define RESTORATION_TILE_OFFSET 8
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 
 #if CONFIG_STRIPED_LOOP_RESTORATION
 #define SGRPROJ_BORDER_VERT 3  // Vertical border used for Sgr
 #else
 #define SGRPROJ_BORDER_VERT 3  // Vertical border used for Sgr
-#endif
+#endif                         // CONFIG_STRIPED_LOOP_RESTORATION
 #define SGRPROJ_BORDER_HORZ 3  // Horizontal border used for Sgr
 
 #if CONFIG_STRIPED_LOOP_RESTORATION
 #define WIENER_BORDER_VERT 2  // Vertical border used for Wiener
 #else
 #define WIENER_BORDER_VERT 3  // Vertical border used for Wiener
-#endif
+#endif                        // CONFIG_STRIPED_LOOP_RESTORATION
 #define WIENER_HALFWIN 3
 #define WIENER_BORDER_HORZ (WIENER_HALFWIN)  // Horizontal border for Wiener
 
@@ -72,7 +72,7 @@
 // Additional pixels to the left and right in above/below buffers
 // It is RESTORATION_BORDER_HORZ rounded up to get nicer buffer alignment
 #define RESTORATION_EXTRA_HORZ 4
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 
 // Pad up to 20 more (may be much less is needed)
 #define RESTORATION_PADDING 20
@@ -95,7 +95,7 @@
 #define RESTORATION_TILEPELS_MAX                                           \
   ((RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16) * \
    (RESTORATION_TILESIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_VERT))
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 
 // Two 32-bit buffers needed for the restored versions from two filters
 // TODO(debargha, rupert): Refactor to not need the large tilesize to be stored
@@ -230,6 +230,16 @@
   // stripe.
   uint16_t tmp_save_above[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
   uint16_t tmp_save_below[RESTORATION_BORDER][RESTORATION_LINEBUFFER_WIDTH];
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+  // Column buffers, for storing 3 pixels at the left/right of each tile
+  // when loopfiltering across tiles is disabled.
+  //
+  // Note: These arrays only need to store the pixels immediately left/right
+  // of each processing unit; the corner pixels (top-left, etc.) are always
+  // stored into the above/below arrays.
+  uint16_t tmp_save_left[RESTORATION_BORDER][RESTORATION_PROC_UNIT_SIZE];
+  uint16_t tmp_save_right[RESTORATION_BORDER][RESTORATION_PROC_UNIT_SIZE];
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
 } RestorationLineBuffers;
 
 typedef struct {
@@ -237,7 +247,7 @@
   uint8_t *stripe_boundary_below;
   int stripe_boundary_stride;
 } RestorationStripeBoundaries;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 
 typedef struct {
   RestorationType frame_restoration_type;
@@ -256,7 +266,7 @@
   RestorationUnitInfo *unit_info;
 #if CONFIG_STRIPED_LOOP_RESTORATION
   RestorationStripeBoundaries boundaries;
-#endif
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
 } RestorationInfo;
 
 static INLINE void set_default_sgrproj(SgrprojInfo *sgrproj_info) {
@@ -315,7 +325,10 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
     const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
     const AV1PixelRect *tile_rect, int tile_stripe0,
-#endif
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+    int loop_filter_across_tiles_enabled,
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+#endif  // CONFIG_STRIPED_LOOP_RESTORATION
     int ss_x, int ss_y, int highbd, int bit_depth, uint8_t *data8, int stride,
     uint8_t *dst8, int dst_stride, int32_t *tmpbuf);
 
@@ -355,7 +368,8 @@
                                        int *rrow1, int *tile_tl_idx);
 
 void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
-                                              struct AV1Common *cm);
+                                              struct AV1Common *cm,
+                                              int after_cdef);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d46b5a1..d576bd9 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -3705,7 +3705,7 @@
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
-    av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm);
+    av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm, 0);
   }
 #endif
 
@@ -3723,6 +3723,7 @@
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+    av1_loop_restoration_save_boundary_lines(&pbi->cur_buf->buf, cm, 1);
     av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf, cm,
                                       cm->rst_info, 7, NULL);
   }
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index fbc00eb..9ce276a 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4685,7 +4685,7 @@
 #if CONFIG_FRAME_SUPERRES && CONFIG_HORZONLY_FRAME_SUPERRES
   if (!av1_superres_unscaled(cm)) aom_extend_frame_borders(cm->frame_to_show);
 #endif
-  av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm);
+  av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 0);
 #endif
 
 #if CONFIG_CDEF
@@ -4708,6 +4708,7 @@
 #endif  // CONFIG_FRAME_SUPERRES
 
 #if CONFIG_LOOP_RESTORATION
+  av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1);
   av1_pick_filter_restoration(cpi->source, cpi);
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index b51e1c7..b95ef6d 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -182,6 +182,9 @@
       limits, rui,
 #if CONFIG_STRIPED_LOOP_RESTORATION
       &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+      cm->loop_filter_across_tiles_enabled,
+#endif
 #endif
       is_uv && cm->subsampling_x, is_uv && cm->subsampling_y, highbd, bit_depth,
       fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],