Expose av1_loop_restoration_filter_unit in restoration.h

This patch also does a certain amount of rejigging for loop
restoration coefficients, grouping the information for a given
restoration unit into a structure called RestorationUnitInfo. The end
result is to completely dispense with the RestorationInternal
structure.

The copy_tile functions in restoration.c, together with those
functions that operate on a single stripe, have been changed so that
they take pointers to the top-left corner of the area on which they
should work, together with a width and height.

The same isn't true of av1_loop_restoration_filter_unit, which still
takes pointers to the top-left of the tile. This is because you
actually need the absolute position in the tile in order to do striped
loop restoration properly.

Change-Id: I768c182cd15c9b2d6cfabb5ffca697cd2a3ff9e1
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index a7a2c5c..0bd26a7 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -162,8 +162,9 @@
     int buf_size = num_stripes * 2 * stride;
     uint8_t *above_buf, *below_buf;
 
-    aom_free(cm->rst_info[p].stripe_boundary_above);
-    aom_free(cm->rst_info[p].stripe_boundary_below);
+    RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
+    aom_free(boundaries->stripe_boundary_above);
+    aom_free(boundaries->stripe_boundary_below);
 
 #if CONFIG_HIGHBITDEPTH
     if (cm->use_highbitdepth) buf_size = buf_size * 2;
@@ -172,9 +173,9 @@
                     (uint8_t *)aom_memalign(1 << align_bits, buf_size));
     CHECK_MEM_ERROR(cm, below_buf,
                     (uint8_t *)aom_memalign(1 << align_bits, buf_size));
-    cm->rst_info[p].stripe_boundary_above = above_buf;
-    cm->rst_info[p].stripe_boundary_below = below_buf;
-    cm->rst_info[p].stripe_boundary_stride = stride;
+    boundaries->stripe_boundary_above = above_buf;
+    boundaries->stripe_boundary_below = below_buf;
+    boundaries->stripe_boundary_stride = stride;
   }
 #endif  // CONFIG_STRIPED_LOOP_RESTORATION
 }
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 3f68091..59b9514 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -576,7 +576,7 @@
 # LOOP_RESTORATION functions
 
 if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
-  add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf";
+  add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf";
   specialize qw/apply_selfguided_restoration sse4_1/;
 
   add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
@@ -586,7 +586,7 @@
   specialize qw/av1_highpass_filter sse4_1/;
 
   if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void apply_selfguided_restoration_highbd/, "const uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf";
+    add_proto qw/void apply_selfguided_restoration_highbd/, "const uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, const int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf";
     specialize qw/apply_selfguided_restoration_highbd sse4_1/;
 
     add_proto qw/void av1_selfguided_restoration_highbd/, "const uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps";
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 01a427f..d8b2e1d 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -45,53 +45,20 @@
 #endif
 };
 
-typedef struct {
-  RestorationInfo *rsi;
-  int keyframe;
-  int ntiles, nhtiles, nvtiles;
-#if CONFIG_HIGHBITDEPTH
-  int bit_depth;
-#endif
-  int subsampling_y;
-  int32_t *tmpbuf;
-#if CONFIG_STRIPED_LOOP_RESTORATION
-  // Temporary buffers to save/restore 2 lines above/below the restoration
-  // stripe
-  // Allow for filter margin to left and right
-  uint16_t
-      tmp_save_above[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
-  uint16_t
-      tmp_save_below[2][RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ];
-#endif
-} RestorationInternal;
-
 int av1_alloc_restoration_struct(AV1_COMMON *cm, RestorationInfo *rst_info,
                                  int width, int height) {
   const int ntiles = av1_get_rest_ntiles(
       width, height, rst_info->restoration_tilesize, NULL, NULL);
-  aom_free(rst_info->restoration_type);
-  CHECK_MEM_ERROR(cm, rst_info->restoration_type,
-                  (RestorationType *)aom_malloc(
-                      sizeof(*rst_info->restoration_type) * ntiles));
-  aom_free(rst_info->wiener_info);
+  aom_free(rst_info->unit_info);
   CHECK_MEM_ERROR(
-      cm, rst_info->wiener_info,
-      (WienerInfo *)aom_memalign(16, sizeof(*rst_info->wiener_info) * ntiles));
-  memset(rst_info->wiener_info, 0, sizeof(*rst_info->wiener_info) * ntiles);
-  aom_free(rst_info->sgrproj_info);
-  CHECK_MEM_ERROR(
-      cm, rst_info->sgrproj_info,
-      (SgrprojInfo *)aom_malloc(sizeof(*rst_info->sgrproj_info) * ntiles));
+      cm, rst_info->unit_info,
+      (RestorationUnitInfo *)aom_malloc(sizeof(*rst_info->unit_info) * ntiles));
   return ntiles;
 }
 
 void av1_free_restoration_struct(RestorationInfo *rst_info) {
-  aom_free(rst_info->restoration_type);
-  rst_info->restoration_type = NULL;
-  aom_free(rst_info->wiener_info);
-  rst_info->wiener_info = NULL;
-  aom_free(rst_info->sgrproj_info);
-  rst_info->sgrproj_info = NULL;
+  aom_free(rst_info->unit_info);
+  rst_info->unit_info = NULL;
 }
 
 // TODO(debargha): This table can be substantially reduced since only a few
@@ -165,39 +132,32 @@
   extend_frame_lowbd(data, width, height, stride, border_horz, border_vert);
 }
 
-static void copy_tile_lowbd(const RestorationTileLimits *limits,
-                            const uint8_t *src, int src_stride, uint8_t *dst,
-                            int dst_stride) {
-  for (int i = limits->v_start; i < limits->v_end; ++i)
-    memcpy(dst + i * dst_stride + limits->h_start,
-           src + i * src_stride + limits->h_start,
-           limits->h_end - limits->h_start);
+static void copy_tile_lowbd(int width, int height, const uint8_t *src,
+                            int src_stride, uint8_t *dst, int dst_stride) {
+  for (int i = 0; i < height; ++i)
+    memcpy(dst + i * dst_stride, src + i * src_stride, width);
 }
 
 #if CONFIG_HIGHBITDEPTH
-static void copy_tile_highbd(const RestorationTileLimits *limits,
-                             const uint16_t *src, int src_stride, uint16_t *dst,
-                             int dst_stride) {
-  for (int i = limits->v_start; i < limits->v_end; ++i)
-    memcpy(dst + i * dst_stride + limits->h_start,
-           src + i * src_stride + limits->h_start,
-           (limits->h_end - limits->h_start) * sizeof(*dst));
+static void copy_tile_highbd(int width, int height, const uint16_t *src,
+                             int src_stride, uint16_t *dst, int dst_stride) {
+  for (int i = 0; i < height; ++i)
+    memcpy(dst + i * dst_stride, src + i * src_stride, width * sizeof(*dst));
 }
 #endif
 
-static void copy_tile(const RestorationTileLimits *limits, const uint8_t *src,
-                      int src_stride, uint8_t *dst, int dst_stride,
-                      int highbd) {
+static void copy_tile(int width, int height, const uint8_t *src, int src_stride,
+                      uint8_t *dst, int dst_stride, int highbd) {
 #if !CONFIG_HIGHBITDEPTH
   assert(highbd == 0);
   (void)highbd;
 #else
   if (highbd)
-    copy_tile_highbd(limits, CONVERT_TO_SHORTPTR(src), src_stride,
+    copy_tile_highbd(width, height, CONVERT_TO_SHORTPTR(src), src_stride,
                      CONVERT_TO_SHORTPTR(dst), dst_stride);
   else
 #endif
-  copy_tile_lowbd(limits, src, src_stride, dst, dst_stride);
+  copy_tile_lowbd(width, height, src, src_stride, dst, dst_stride);
 }
 
 #if CONFIG_STRIPED_LOOP_RESTORATION
@@ -214,23 +174,26 @@
 // restore_processing_stripe_boundary.
 //
 // limits gives the rectangular limits of the remaining stripes for the current
-// restoration unit.
-static int setup_processing_stripe_boundary(const RestorationTileLimits *limits,
-                                            uint8_t *data8, int stride,
-                                            RestorationInternal *rst,
-                                            int use_highbd) {
+// restoration unit. rsb is the stored stripe boundaries (the saved output from
+// the deblocker). stripe_height is the height of each stripe. ss_y is true if
+// we're on a chroma plane with vertical subsampling. use_highbd is true if the
+// data has 2 bytes per pixel. rlbs contain scratch buffers to hold the CDEF
+// data (written back to the frame by restore_processing_stripe_boundary)
+static int setup_processing_stripe_boundary(
+    const RestorationTileLimits *limits, const RestorationStripeBoundaries *rsb,
+    int stripe_height, int ss_y, int use_highbd, uint8_t *data8, int stride,
+    RestorationLineBuffers *rlbs) {
   // Which stripe is this? limits->v_start is the top of the stripe in pixel
   // units, but we add tile_offset to get the number of pixels from the top of
   // the first stripe, which lies off the image.
-  const int tile_offset = RESTORATION_TILE_OFFSET >> rst->subsampling_y;
-  const int stripe_height = rst->rsi->procunit_height;
+  const int tile_offset = RESTORATION_TILE_OFFSET >> ss_y;
   const int stripe_index = (limits->v_start + tile_offset) / stripe_height;
 
   // Horizontal offsets within the line buffers. The buffer logically starts at
   // column -RESTORATION_EXTRA_HORZ. We'll start our copy from the column
   // limits->h_start - RESTORATION_EXTRA_HORZ and copy up to the column
   // limits->h_end + RESTORATION_EXTRA_HORZ.
-  const int buf_stride = rst->rsi->stripe_boundary_stride;
+  const int buf_stride = rsb->stripe_boundary_stride;
   const int buf_x0_off = limits->h_start;
   const int line_width =
       (limits->h_end - limits->h_start) + 2 * RESTORATION_EXTRA_HORZ;
@@ -247,11 +210,10 @@
 
     for (int i = 0; i < 2; ++i) {
       const int buf_off = buf_x0_off + (above_buf_y + i) * buf_stride;
-      const uint8_t *src =
-          rst->rsi->stripe_boundary_above + (buf_off << use_highbd);
+      const uint8_t *src = rsb->stripe_boundary_above + (buf_off << use_highbd);
       uint8_t *dst8 = data8_tl + i * stride;
       // Save old pixels, then replace with data from boundary_above_buf
-      memcpy(rst->tmp_save_above[i], REAL_PTR(use_highbd, dst8), line_size);
+      memcpy(rlbs->tmp_save_above[i], REAL_PTR(use_highbd, dst8), line_size);
       memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
     }
   }
@@ -268,11 +230,10 @@
 
   for (int i = 0; i < rows_needed_below; ++i) {
     const int buf_off = buf_x0_off + (below_buf_y + i) * buf_stride;
-    const uint8_t *src =
-        rst->rsi->stripe_boundary_below + (buf_off << use_highbd);
+    const uint8_t *src = rsb->stripe_boundary_below + (buf_off << use_highbd);
     uint8_t *dst8 = data8_bl + i * stride;
     // Save old pixels, then replace with data from boundary_below_buf
-    memcpy(rst->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size);
+    memcpy(rlbs->tmp_save_below[i], REAL_PTR(use_highbd, dst8), line_size);
     memcpy(REAL_PTR(use_highbd, dst8), src, line_size);
   }
 
@@ -283,10 +244,9 @@
 // This function restores the boundary lines modified by
 // setup_processing_stripe_boundary.
 static void restore_processing_stripe_boundary(
-    const RestorationTileLimits *limits, const RestorationInternal *rst,
-    uint8_t *data8, int stride, int use_highbd) {
-  const int tile_offset = RESTORATION_TILE_OFFSET >> rst->subsampling_y;
-  const int stripe_height = rst->rsi->procunit_height;
+    const RestorationTileLimits *limits, const RestorationLineBuffers *rlbs,
+    int stripe_height, int ss_y, int use_highbd, uint8_t *data8, int stride) {
+  const int tile_offset = RESTORATION_TILE_OFFSET >> ss_y;
   const int stripe_index = (limits->v_start + tile_offset) / stripe_height;
 
   const int line_width =
@@ -301,7 +261,7 @@
     for (int i = 0; i < 2; ++i) {
       uint8_t *dst8 = data8_tl + i * stride;
       // Save old pixels, then replace with data from boundary_above_buf
-      memcpy(REAL_PTR(use_highbd, dst8), rst->tmp_save_above[i], line_size);
+      memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_above[i], line_size);
     }
   }
 
@@ -313,7 +273,7 @@
   for (int i = 0; i < rows_needed_below; ++i) {
     uint8_t *dst8 = data8_bl + i * stride;
     // Save old pixels, then replace with data from boundary_below_buf
-    memcpy(REAL_PTR(use_highbd, dst8), rst->tmp_save_below[i], line_size);
+    memcpy(REAL_PTR(use_highbd, dst8), rlbs->tmp_save_below[i], line_size);
   }
 }
 #undef REAL_PTR
@@ -357,44 +317,46 @@
 #define wiener_convolve8_add_src aom_convolve8_add_src
 #endif
 
-static void wiener_filter_stripe(const RestorationTileLimits *limits,
-                                 const RestorationInternal *rst, int tile_idx,
-                                 int procunit_width, int stripe_height,
-                                 const uint8_t *src, int src_stride,
-                                 uint8_t *dst, int dst_stride) {
-  const RestorationInfo *rsi = rst->rsi;
+static void wiener_filter_stripe(const RestorationUnitInfo *rui,
+                                 int stripe_width, int stripe_height,
+                                 int procunit_width, const uint8_t *src,
+                                 int src_stride, uint8_t *dst, int dst_stride,
+                                 int32_t *tmpbuf, int bit_depth) {
+  (void)tmpbuf;
+  (void)bit_depth;
+  assert(bit_depth == 8);
+
   const int mid_height =
       stripe_height - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2;
   assert(mid_height > 0);
-  for (int j = limits->h_start; j < limits->h_end; j += procunit_width) {
-    int w = AOMMIN(procunit_width, (limits->h_end - j + 15) & ~15);
+  for (int j = 0; j < stripe_width; j += procunit_width) {
+    int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
     const uint8_t *src_p = src + j;
     uint8_t *dst_p = dst + j;
     for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
       InterpKernel vertical_top;
-      stepdown_wiener_kernel(rsi->wiener_info[tile_idx].vfilter, vertical_top,
+      stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_top,
                              WIENER_BORDER_VERT + b, 1);
       wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride,
-                               rsi->wiener_info[tile_idx].hfilter, 16,
-                               vertical_top, 16, w, 1);
+                               rui->wiener_info.hfilter, 16, vertical_top, 16,
+                               w, 1);
       src_p += src_stride;
       dst_p += dst_stride;
     }
 
     wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride,
-                             rsi->wiener_info[tile_idx].hfilter, 16,
-                             rsi->wiener_info[tile_idx].vfilter, 16, w,
-                             mid_height);
+                             rui->wiener_info.hfilter, 16,
+                             rui->wiener_info.vfilter, 16, w, mid_height);
     src_p += src_stride * mid_height;
     dst_p += dst_stride * mid_height;
 
     for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
       InterpKernel vertical_bot;
-      stepdown_wiener_kernel(rsi->wiener_info[tile_idx].vfilter, vertical_bot,
+      stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_bot,
                              WIENER_BORDER_VERT + b, 0);
       wiener_convolve8_add_src(src_p, src_stride, dst_p, dst_stride,
-                               rsi->wiener_info[tile_idx].hfilter, 16,
-                               vertical_bot, 16, w, 1);
+                               rui->wiener_info.hfilter, 16, vertical_bot, 16,
+                               w, 1);
       src_p += src_stride;
       dst_p += dst_stride;
     }
@@ -749,7 +711,7 @@
   }
 }
 
-void decode_xq(int *xqd, int *xq) {
+void decode_xq(const int *xqd, int *xq) {
   xq[0] = xqd[0];
   xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
 }
@@ -1091,8 +1053,9 @@
 }
 
 void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height,
-                                    int stride, int eps, int *xqd, uint8_t *dst,
-                                    int dst_stride, int32_t *tmpbuf) {
+                                    int stride, int eps, const int *xqd,
+                                    uint8_t *dst, int dst_stride,
+                                    int32_t *tmpbuf) {
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
@@ -1124,19 +1087,19 @@
   }
 }
 
-static void sgrproj_filter_stripe(const RestorationTileLimits *limits,
-                                  const RestorationInternal *rst, int tile_idx,
-                                  int procunit_width, int stripe_height,
-                                  const uint8_t *src, int src_stride,
-                                  uint8_t *dst, int dst_stride) {
-  for (int j = limits->h_start; j < limits->h_end; j += procunit_width) {
-    int w = AOMMIN(procunit_width, limits->h_end - j);
-    const uint8_t *src_p = src + j;
-    uint8_t *dst_p = dst + j;
-    apply_selfguided_restoration(src_p, w, stripe_height, src_stride,
-                                 rst->rsi->sgrproj_info[tile_idx].ep,
-                                 rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
-                                 dst_stride, rst->tmpbuf);
+static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
+                                  int stripe_width, int stripe_height,
+                                  int procunit_width, const uint8_t *src,
+                                  int src_stride, uint8_t *dst, int dst_stride,
+                                  int32_t *tmpbuf, int bit_depth) {
+  (void)bit_depth;
+  assert(bit_depth == 8);
+
+  for (int j = 0; j < stripe_width; j += procunit_width) {
+    int w = AOMMIN(procunit_width, stripe_width - j);
+    apply_selfguided_restoration(src + j, w, stripe_height, src_stride,
+                                 rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
+                                 dst + j, dst_stride, tmpbuf);
   }
 }
 
@@ -1147,46 +1110,46 @@
 #define wiener_highbd_convolve8_add_src aom_highbd_convolve8_add_src
 #endif
 
-static void wiener_filter_stripe_highbd(const RestorationTileLimits *limits,
-                                        const RestorationInternal *rst,
-                                        int tile_idx, int procunit_width,
-                                        int stripe_height, const uint8_t *src8,
+static void wiener_filter_stripe_highbd(const RestorationUnitInfo *rui,
+                                        int stripe_width, int stripe_height,
+                                        int procunit_width, const uint8_t *src8,
                                         int src_stride, uint8_t *dst8,
-                                        int dst_stride) {
-  const RestorationInfo *rsi = rst->rsi;
+                                        int dst_stride, int32_t *tmpbuf,
+                                        int bit_depth) {
+  (void)tmpbuf;
+
   const int mid_height =
       stripe_height - (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2;
   assert(mid_height > 0);
 
-  for (int j = limits->h_start; j < limits->h_end; j += procunit_width) {
-    int w = AOMMIN(procunit_width, (limits->h_end - j + 15) & ~15);
+  for (int j = 0; j < stripe_width; j += procunit_width) {
+    int w = AOMMIN(procunit_width, (stripe_width - j + 15) & ~15);
     const uint8_t *src8_p = src8 + j;
     uint8_t *dst8_p = dst8 + j;
 
     for (int b = 0; b < WIENER_HALFWIN - WIENER_BORDER_VERT; ++b) {
       InterpKernel vertical_top;
-      stepdown_wiener_kernel(rsi->wiener_info[tile_idx].vfilter, vertical_top,
+      stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_top,
                              WIENER_BORDER_VERT + b, 1);
       wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride,
-                                      rsi->wiener_info[tile_idx].hfilter, 16,
-                                      vertical_top, 16, w, 1, rst->bit_depth);
+                                      rui->wiener_info.hfilter, 16,
+                                      vertical_top, 16, w, 1, bit_depth);
       src8_p += src_stride;
       dst8_p += dst_stride;
     }
     assert(stripe_height > (WIENER_HALFWIN - WIENER_BORDER_VERT) * 2);
-    wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride,
-                                    rsi->wiener_info[tile_idx].hfilter, 16,
-                                    rsi->wiener_info[tile_idx].vfilter, 16, w,
-                                    mid_height, rst->bit_depth);
+    wiener_highbd_convolve8_add_src(
+        src8_p, src_stride, dst8_p, dst_stride, rui->wiener_info.hfilter, 16,
+        rui->wiener_info.vfilter, 16, w, mid_height, bit_depth);
     src8_p += src_stride * (mid_height);
     dst8_p += dst_stride * (mid_height);
     for (int b = WIENER_HALFWIN - WIENER_BORDER_VERT - 1; b >= 0; --b) {
       InterpKernel vertical_bot;
-      stepdown_wiener_kernel(rsi->wiener_info[tile_idx].vfilter, vertical_bot,
+      stepdown_wiener_kernel(rui->wiener_info.vfilter, vertical_bot,
                              WIENER_BORDER_VERT + b, 0);
       wiener_highbd_convolve8_add_src(src8_p, src_stride, dst8_p, dst_stride,
-                                      rsi->wiener_info[tile_idx].hfilter, 16,
-                                      vertical_bot, 16, w, 1, rst->bit_depth);
+                                      rui->wiener_info.hfilter, 16,
+                                      vertical_bot, 16, w, 1, bit_depth);
       src8_p += src_stride;
       dst8_p += dst_stride;
     }
@@ -1304,9 +1267,9 @@
 
 void apply_selfguided_restoration_highbd_c(const uint16_t *dat, int width,
                                            int height, int stride,
-                                           int bit_depth, int eps, int *xqd,
-                                           uint16_t *dst, int dst_stride,
-                                           int32_t *tmpbuf) {
+                                           int bit_depth, int eps,
+                                           const int *xqd, uint16_t *dst,
+                                           int dst_stride, int32_t *tmpbuf) {
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
@@ -1340,29 +1303,28 @@
   }
 }
 
-static void sgrproj_filter_stripe_highbd(const RestorationTileLimits *limits,
-                                         const RestorationInternal *rst,
-                                         int tile_idx, int procunit_width,
-                                         int stripe_height, const uint8_t *src8,
-                                         int src_stride, uint8_t *dst8,
-                                         int dst_stride) {
-  for (int j = limits->h_start; j < limits->h_end; j += procunit_width) {
-    int w = AOMMIN(procunit_width, limits->h_end - j);
+static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
+                                         int stripe_width, int stripe_height,
+                                         int procunit_width,
+                                         const uint8_t *src8, int src_stride,
+                                         uint8_t *dst8, int dst_stride,
+                                         int32_t *tmpbuf, int bit_depth) {
+  for (int j = 0; j < stripe_width; j += procunit_width) {
+    int w = AOMMIN(procunit_width, stripe_width - j);
     const uint16_t *data_p = CONVERT_TO_SHORTPTR(src8) + j;
     uint16_t *dst_p = CONVERT_TO_SHORTPTR(dst8) + j;
     apply_selfguided_restoration_highbd(
-        data_p, w, stripe_height, src_stride, rst->bit_depth,
-        rst->rsi->sgrproj_info[tile_idx].ep,
-        rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, dst_stride, rst->tmpbuf);
+        data_p, w, stripe_height, src_stride, bit_depth, rui->sgrproj_info.ep,
+        rui->sgrproj_info.xqd, dst_p, dst_stride, tmpbuf);
   }
 }
 #endif  // CONFIG_HIGHBITDEPTH
 
-typedef void (*stripe_filter_fun)(const RestorationTileLimits *limits,
-                                  const RestorationInternal *rst, int tile_idx,
-                                  int procunit_width, int stripe_height,
-                                  const uint8_t *src, int src_stride,
-                                  uint8_t *dst, int dst_stride);
+typedef void (*stripe_filter_fun)(const RestorationUnitInfo *rui,
+                                  int stripe_width, int stripe_height,
+                                  int procunit_width, const uint8_t *src,
+                                  int src_stride, uint8_t *dst, int dst_stride,
+                                  int32_t *tmpbuf, int bit_depth);
 
 #if CONFIG_HIGHBITDEPTH
 #define NUM_STRIPE_FILTERS 4
@@ -1377,17 +1339,29 @@
 #endif  // CONFIG_HIGHBITDEPTH
 };
 
-static void filter_rest_unit(const RestorationTileLimits *limits,
-                             RestorationInternal *rst, int tile_idx, int highbd,
-                             uint8_t *data8, int stride, uint8_t *dst8,
-                             int dst_stride) {
-  RestorationType tile_rtype = rst->rsi->restoration_type[tile_idx];
-  if (tile_rtype == RESTORE_NONE) {
-    copy_tile(limits, data8, stride, dst8, dst_stride, highbd);
+void av1_loop_restoration_filter_unit(const RestorationTileLimits *limits,
+                                      const RestorationUnitInfo *rui,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                                      const RestorationStripeBoundaries *rsb,
+                                      RestorationLineBuffers *rlbs, int ss_y,
+#endif
+                                      int procunit_width, int procunit_height,
+                                      int highbd, int bit_depth, uint8_t *data8,
+                                      int stride, uint8_t *dst8, int dst_stride,
+                                      int32_t *tmpbuf) {
+  RestorationType unit_rtype = rui->restoration_type;
+
+  int unit_h = limits->v_end - limits->v_start;
+  int unit_w = limits->h_end - limits->h_start;
+  uint8_t *data8_tl = data8 + limits->v_start * stride + limits->h_start;
+  uint8_t *dst8_tl = dst8 + limits->v_start * dst_stride + limits->h_start;
+
+  if (unit_rtype == RESTORE_NONE) {
+    copy_tile(unit_w, unit_h, data8_tl, stride, dst8_tl, dst_stride, highbd);
     return;
   }
 
-  const int filter_idx = 2 * highbd + (tile_rtype == RESTORE_SGRPROJ);
+  const int filter_idx = 2 * highbd + (unit_rtype == RESTORE_SGRPROJ);
   assert(filter_idx < NUM_STRIPE_FILTERS);
   const stripe_filter_fun stripe_filter = stripe_filters[filter_idx];
 
@@ -1395,25 +1369,24 @@
 #if CONFIG_STRIPED_LOOP_RESTORATION
   RestorationTileLimits remaining_stripes = *limits;
 #endif
-  int i = limits->v_start;
-  while (i < limits->v_end) {
+  int i = 0;
+  while (i < unit_h) {
 #if CONFIG_STRIPED_LOOP_RESTORATION
-    remaining_stripes.v_start = i;
-    int h = setup_processing_stripe_boundary(&remaining_stripes, data8, stride,
-                                             rst, highbd);
-    if (tile_rtype == RESTORE_WIENER) h = ALIGN_POWER_OF_TWO(h, 1);
+    remaining_stripes.v_start = limits->v_start + i;
+    int h = setup_processing_stripe_boundary(&remaining_stripes, rsb,
+                                             procunit_height, ss_y, highbd,
+                                             data8, stride, rlbs);
+    if (unit_rtype == RESTORE_WIENER) h = ALIGN_POWER_OF_TWO(h, 1);
 #else
-    const int h =
-        AOMMIN(rst->rsi->procunit_height, (limits->v_end - i + 15) & ~15);
+    const int h = AOMMIN(procunit_height, (unit_h - i + 15) & ~15);
 #endif
 
-    stripe_filter(limits, rst, tile_idx, rst->rsi->procunit_width, h,
-                  data8 + i * stride, stride, dst8 + i * dst_stride,
-                  dst_stride);
+    stripe_filter(rui, unit_w, h, procunit_width, data8_tl + i * stride, stride,
+                  dst8_tl + i * dst_stride, dst_stride, tmpbuf, bit_depth);
 
 #if CONFIG_STRIPED_LOOP_RESTORATION
-    restore_processing_stripe_boundary(&remaining_stripes, rst, data8, stride,
-                                       highbd);
+    restore_processing_stripe_boundary(
+        &remaining_stripes, rlbs, procunit_height, ss_y, highbd, data8, stride);
 #endif
 
     i += h;
@@ -1431,27 +1404,10 @@
   { RESTORATION_BORDER_HORZ, RESTORATION_BORDER_VERT }
 };
 
-static void filter_frame(int width, int height, RestorationType frame_rtype,
-                         int highbd, uint8_t *data8, int stride, uint8_t *dst8,
-                         int dst_stride, RestorationInternal *rst) {
-  const struct restore_borders *borders = &restore_borders[frame_rtype];
-
-  extend_frame(data8, width, height, stride, borders->hborder, borders->vborder,
-               highbd);
-
-  for (int tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
-    RestorationTileLimits limits = av1_get_rest_tile_limits(
-        tile_idx, rst->nhtiles, rst->nvtiles, rst->rsi->restoration_tilesize,
-        width, height, rst->subsampling_y);
-
-    filter_rest_unit(&limits, rst, tile_idx, highbd, data8, stride, dst8,
-                     dst_stride);
-  }
-}
-
-void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
-                                RestorationInfo *rsi, int components_pattern,
-                                YV12_BUFFER_CONFIG *dst) {
+void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
+                                       AV1_COMMON *cm, RestorationInfo *rsi,
+                                       int components_pattern,
+                                       YV12_BUFFER_CONFIG *dst) {
   YV12_BUFFER_CONFIG dst_;
 
   typedef void (*copy_fun)(const YV12_BUFFER_CONFIG *src,
@@ -1491,11 +1447,21 @@
                          "Failed to allocate restoration dst buffer");
   }
 
-  RestorationInternal rst;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  RestorationLineBuffers rlbs;
+#endif
+#if CONFIG_HIGHBITDEPTH
+  const int bit_depth = cm->bit_depth;
+  const int highbd = cm->use_highbitdepth;
+#else
+  const int bit_depth = 8;
+  const int highbd = 0;
+#endif
+
   for (int plane = 0; plane < 3; ++plane) {
     if (!((components_pattern >> plane) & 1)) continue;
-
-    RestorationType rtype = rsi[plane].frame_restoration_type;
+    const RestorationInfo *prsi = &rsi[plane];
+    RestorationType rtype = prsi->frame_restoration_type;
     if (rtype == RESTORE_NONE) {
       copy_funs[plane](frame, dst);
       continue;
@@ -1507,23 +1473,31 @@
     const int plane_width = frame->crop_widths[is_uv];
     const int plane_height = frame->crop_heights[is_uv];
 
-    rst.rsi = &rsi[plane];
-    rst.keyframe = cm->frame_type == KEY_FRAME;
-    rst.ntiles = av1_get_rest_ntiles(plane_width, plane_height,
-                                     rst.rsi->restoration_tilesize,
-                                     &rst.nhtiles, &rst.nvtiles);
-    rst.subsampling_y = ss_y;
-    rst.tmpbuf = cm->rst_tmpbuf;
-#if CONFIG_HIGHBITDEPTH
-    rst.bit_depth = cm->bit_depth;
-    const int highbd = cm->use_highbitdepth;
-#else
-    const int highbd = 0;
-#endif
+    int nhtiles, nvtiles;
+    const int ntiles =
+        av1_get_rest_ntiles(plane_width, plane_height,
+                            prsi->restoration_tilesize, &nhtiles, &nvtiles);
 
-    filter_frame(plane_width, plane_height, rtype, highbd,
-                 frame->buffers[plane], frame->strides[is_uv],
-                 dst->buffers[plane], dst->strides[is_uv], &rst);
+    const struct restore_borders *borders =
+        &restore_borders[prsi->frame_restoration_type];
+    extend_frame(frame->buffers[plane], plane_width, plane_height,
+                 frame->strides[is_uv], borders->hborder, borders->vborder,
+                 highbd);
+
+    for (int tile_idx = 0; tile_idx < ntiles; ++tile_idx) {
+      RestorationTileLimits limits = av1_get_rest_tile_limits(
+          tile_idx, nhtiles, nvtiles, prsi->restoration_tilesize, plane_width,
+          plane_height, ss_y);
+
+      av1_loop_restoration_filter_unit(
+          &limits, &prsi->unit_info[tile_idx],
+#if CONFIG_STRIPED_LOOP_RESTORATION
+          &prsi->boundaries, &rlbs, ss_y,
+#endif
+          prsi->procunit_width, prsi->procunit_height, highbd, bit_depth,
+          frame->buffers[plane], frame->strides[is_uv], dst->buffers[plane],
+          dst->strides[is_uv], cm->rst_tmpbuf);
+    }
   }
 
   if (dst == &dst_) {
@@ -1627,7 +1601,7 @@
 // For each 64 pixel high stripe, save 4 scan lines to be used as boundary in
 // the loop restoration process. The lines are saved in
 // rst_internal.stripe_boundary_lines
-void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
+void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
                                               AV1_COMMON *cm) {
   for (int p = 0; p < MAX_MB_PLANE; ++p) {
     const int is_uv = p > 0;
@@ -1638,9 +1612,10 @@
     const int stripe_height = 64 >> (is_uv && cm->subsampling_y);
     const int stripe_offset = (56 >> (is_uv && cm->subsampling_y)) - 2;
 
-    uint8_t *boundary_above_buf = cm->rst_info[p].stripe_boundary_above;
-    uint8_t *boundary_below_buf = cm->rst_info[p].stripe_boundary_below;
-    const int boundary_stride = cm->rst_info[p].stripe_boundary_stride;
+    RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
+    uint8_t *boundary_above_buf = boundaries->stripe_boundary_above;
+    uint8_t *boundary_below_buf = boundaries->stripe_boundary_below;
+    const int boundary_stride = boundaries->stripe_boundary_stride;
 #if CONFIG_HIGHBITDEPTH
     const int use_highbitdepth = cm->use_highbitdepth;
     if (use_highbitdepth) {
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 161654d..eddb7ff 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -207,18 +207,38 @@
 } sgr_params_type;
 
 typedef struct {
-  int restoration_tilesize;
-  int procunit_width, procunit_height;
-  RestorationType frame_restoration_type;
-  RestorationType *restoration_type;
-  // Wiener filter
-  WienerInfo *wiener_info;
-  // Selfguided proj filter
-  SgrprojInfo *sgrproj_info;
+  RestorationType restoration_type;
+  WienerInfo wiener_info;
+  SgrprojInfo sgrproj_info;
+} RestorationUnitInfo;
+
 #if CONFIG_STRIPED_LOOP_RESTORATION
+// A restoration line buffer needs space for two lines plus a horizontal filter
+// margin of RESTORATION_EXTRA_HORZ on each side.
+#define RESTORATION_LINEBUFFER_WIDTH \
+  (RESTORATION_TILESIZE_MAX + 2 * RESTORATION_EXTRA_HORZ)
+
+typedef struct {
+  // Temporary buffers to save/restore 2 lines above/below the restoration
+  // stripe.
+  uint16_t tmp_save_above[2][RESTORATION_LINEBUFFER_WIDTH];
+  uint16_t tmp_save_below[2][RESTORATION_LINEBUFFER_WIDTH];
+} RestorationLineBuffers;
+
+typedef struct {
   uint8_t *stripe_boundary_above;
   uint8_t *stripe_boundary_below;
   int stripe_boundary_stride;
+} RestorationStripeBoundaries;
+#endif
+
+typedef struct {
+  RestorationType frame_restoration_type;
+  int restoration_tilesize;
+  int procunit_width, procunit_height;
+  RestorationUnitInfo *unit_info;
+#if CONFIG_STRIPED_LOOP_RESTORATION
+  RestorationStripeBoundaries boundaries;
 #endif
 } RestorationInfo;
 
@@ -290,10 +310,43 @@
 
 void extend_frame(uint8_t *data, int width, int height, int stride,
                   int border_horz, int border_vert, int highbd);
-void decode_xq(int *xqd, int *xq);
-void av1_loop_restoration_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
-                                RestorationInfo *rsi, int components_pattern,
-                                YV12_BUFFER_CONFIG *dst);
+void decode_xq(const int *xqd, int *xq);
+
+// Filter a single loop restoration unit.
+//
+// limits is the limits of the unit. rui gives the mode to use for this unit
+// and its coefficients. If striped loop restoration is enabled, rsb contains
+// deblocked pixels to use for stripe boundaries; rlbs is just some space to
+// use as a scratch buffer. ss_y is a flag which should be 1 if this is a plane
+// with vertical subsampling.
+//
+// procunit_width and procunit_height are the width and height in which to
+// process the data. highbd is a flag which should be 1 in high bit depth mode,
+// in which case bit_depth is the bit depth.
+//
+// data8 is the frame data (pointing at the top-left corner of the frame, not
+// the restoration unit) and stride is its stride. dst8 is the buffer where the
+// results will be written and has stride dst_stride. Like data8, dst8 should
+// point at the top-left corner of the frame.
+//
+// Finally tmpbuf is a scratch buffer used by the sgrproj filter which should
+// be at least SGRPROJ_TMPBUF_SIZE big.
+void av1_loop_restoration_filter_unit(const RestorationTileLimits *limits,
+                                      const RestorationUnitInfo *rui,
+#if CONFIG_STRIPED_LOOP_RESTORATION
+                                      const RestorationStripeBoundaries *rsb,
+                                      RestorationLineBuffers *rlbs, int ss_y,
+#endif
+                                      int procunit_height, int procunit_width,
+                                      int highbd, int bit_depth, uint8_t *data8,
+                                      int stride, uint8_t *dst8, int dst_stride,
+                                      int32_t *tmpbuf);
+
+void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
+                                       struct AV1Common *cm,
+                                       RestorationInfo *rsi,
+                                       int components_pattern,
+                                       YV12_BUFFER_CONFIG *dst);
 void av1_loop_restoration_precal();
 
 // Return 1 iff the block at mi_row, mi_col with size bsize is a
@@ -310,7 +363,7 @@
                                        int *rcol0, int *rcol1, int *rrow0,
                                        int *rrow1, int *nhtiles);
 
-void av1_loop_restoration_save_boundary_lines(YV12_BUFFER_CONFIG *frame,
+void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
                                               struct AV1Common *cm);
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/common/x86/selfguided_sse4.c b/av1/common/x86/selfguided_sse4.c
index 8626c01..3e66eb0 100644
--- a/av1/common/x86/selfguided_sse4.c
+++ b/av1/common/x86/selfguided_sse4.c
@@ -1058,8 +1058,8 @@
 
 void apply_selfguided_restoration_sse4_1(const uint8_t *dat, int width,
                                          int height, int stride, int eps,
-                                         int *xqd, uint8_t *dst, int dst_stride,
-                                         int32_t *tmpbuf) {
+                                         const int *xqd, uint8_t *dst,
+                                         int dst_stride, int32_t *tmpbuf) {
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
@@ -1740,7 +1740,7 @@
 
 void apply_selfguided_restoration_highbd_sse4_1(
     const uint16_t *dat, int width, int height, int stride, int bit_depth,
-    int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf) {
+    int eps, const int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf) {
   int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;