Clean up SGR code and make consistent Change-Id: I99c0cd287d154acc5063c92eb3ad4035bff8dad7

diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 1fca124..f308fef 100755
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -466,7 +466,7 @@
   add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
   specialize qw/apply_selfguided_restoration sse4_1 avx2/;
 
-  add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt1, int32_t *flt2, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd";
+  add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt0, int32_t *flt1, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd";
   specialize qw/av1_selfguided_restoration sse4_1 avx2/;
 }
 

diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index e861243..fac9ba8 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c

@@ -747,11 +747,11 @@
 
 #if CONFIG_SKIP_SGR
 void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) {
-  if (params->r1 == 0) {
+  if (params->r0 == 0) {
     assert(xqd[0] == 0);
     xq[0] = 0;
     xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1];
-  } else if (params->r2 == 0) {
+  } else if (params->r1 == 0) {
     assert(xqd[1] == 0);
     xq[0] = xqd[0];
     xq[1] = 0;
@@ -1051,7 +1051,7 @@
 }
 
 void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
-                                  int dgd_stride, int32_t *flt1, int32_t *flt2,
+                                  int dgd_stride, int32_t *flt0, int32_t *flt1,
                                   int flt_stride, const sgr_params_type *params,
                                   int bit_depth, int highbd) {
   int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
@@ -1078,45 +1078,45 @@
   // If params->r == 0 we skip the corresponding filter. We only allow one of
   // the radii to be 0, as having both equal to 0 would be equivalent to
   // skipping SGR entirely.
-  assert(!(params->r1 == 0 && params->r2 == 0));
+  assert(!(params->r0 == 0 && params->r1 == 0));
 
 #if CONFIG_FAST_SGR
-  if (params->r1 > 0)
+  if (params->r0 > 0)
     av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
-                                             flt1, flt_stride, bit_depth,
-                                             params->r1, params->e1);
-  if (params->r2 > 0)
-    av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
-                                        flt2, flt_stride, bit_depth, params->r2,
-                                        params->e2);
-#else
+                                             flt0, flt_stride, bit_depth,
+                                             params->r0, params->e0);
   if (params->r1 > 0)
     av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
                                         flt1, flt_stride, bit_depth, params->r1,
                                         params->e1);
-
-  if (params->r2 > 0)
+#else
+  if (params->r0 > 0)
     av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
-                                        flt2, flt_stride, bit_depth, params->r2,
-                                        params->e2);
+                                        flt0, flt_stride, bit_depth, params->r0,
+                                        params->e0);
+
+  if (params->r1 > 0)
+    av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
+                                        flt1, flt_stride, bit_depth, params->r1,
+                                        params->e1);
 #endif  // CONFIG_FAST_SGR
 #else   // CONFIG_SKIP_SGR
 #if CONFIG_FAST_SGR
   // r == 2 filter
   av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
-                                           flt1, flt_stride, bit_depth,
-                                           params->r1, params->e1);
+                                           flt0, flt_stride, bit_depth,
+                                           params->r0, params->e0);
   // r == 1 filter
-  av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2,
-                                      flt_stride, bit_depth, params->r2,
-                                      params->e2);
-#else
   av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
                                       flt_stride, bit_depth, params->r1,
                                       params->e1);
-  av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2,
-                                      flt_stride, bit_depth, params->r2,
-                                      params->e2);
+#else
+  av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt0,
+                                      flt_stride, bit_depth, params->r0,
+                                      params->e0);
+  av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
+                                      flt_stride, bit_depth, params->r1,
+                                      params->e1);
 #endif  // CONFIG_FAST_SGR
 #endif  // CONFIG_SKIP_SGR
 }
@@ -1126,18 +1126,18 @@
                                     uint8_t *dst8, int dst_stride,
                                     int32_t *tmpbuf, int bit_depth,
                                     int highbd) {
-  int32_t *flt1 = tmpbuf;
-  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
+  int32_t *flt0 = tmpbuf;
+  int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 
 #if CONFIG_SKIP_SGR
   const sgr_params_type *params = &sgr_params[eps];
-  av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
+  av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width,
                                params, bit_depth, highbd);
   int xq[2];
   decode_xq(xqd, xq, params);
 #else   // CONFIG_SKIP_SGR
-  av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
+  av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width,
                                &sgr_params[eps], bit_depth, highbd);
   int xq[2];
   decode_xq(xqd, xq);
@@ -1154,11 +1154,11 @@
       int32_t v = u << SGRPROJ_PRJ_BITS;
       // If params->r == 0 then we skipped the filtering in
       // av1_selfguided_restoration_c, i.e. flt[k] == u
-      if (params->r1 > 0) v += xq[0] * (flt1[k] - u);
-      if (params->r2 > 0) v += xq[1] * (flt2[k] - u);
+      if (params->r0 > 0) v += xq[0] * (flt0[k] - u);
+      if (params->r1 > 0) v += xq[1] * (flt1[k] - u);
 #else   // CONFIG_SKIP_SGR
-      const int32_t f1 = flt1[k] - u;
-      const int32_t f2 = flt2[k] - u;
+      const int32_t f1 = flt0[k] - u;
+      const int32_t f2 = flt1[k] - u;
       const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
 #endif  // CONFIG_SKIP_SGR
       const int16_t w =

diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 520cfa5..e3a047e 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h

@@ -176,10 +176,10 @@
 #endif
 
 typedef struct {
+  int r0;
+  int e0;
   int r1;
   int e1;
-  int r2;
-  int e2;
 } sgr_params_type;
 
 typedef struct {

diff --git a/av1/common/x86/selfguided_avx2.c b/av1/common/x86/selfguided_avx2.c
index bc82bec..17be157 100644
--- a/av1/common/x86/selfguided_avx2.c
+++ b/av1/common/x86/selfguided_avx2.c

@@ -526,8 +526,8 @@
 #endif
 
 void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
-                                     int dgd_stride, int32_t *flt1,
-                                     int32_t *flt2, int flt_stride,
+                                     int dgd_stride, int32_t *flt0,
+                                     int32_t *flt1, int flt_stride,
                                      const sgr_params_type *params,
                                      int bit_depth, int highbd) {
   // The ALIGN_POWER_OF_TWO macro here ensures that column 1 of Atl, Btl,
@@ -583,36 +583,36 @@
     integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
                     buf_stride);
 
-// Write to flt1 and flt2
+// Write to flt0 and flt1
 #if CONFIG_SKIP_SGR
   // If params->r == 0 we skip the corresponding filter. We only allow one of
   // the radii to be 0, as having both equal to 0 would be equivalent to
   // skipping SGR entirely.
-  assert(!(params->r1 == 0 && params->r2 == 0));
+  assert(!(params->r0 == 0 && params->r1 == 0));
 #if CONFIG_FAST_SGR
+  assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
   assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
-  assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
-  if (params->r1 > 0) {
-    calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
-                 params->r1);
-    final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
+  if (params->r0 > 0) {
+    calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
+                 params->r0);
+    final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
                       width, height, highbd);
   }
 
-  if (params->r2 > 0) {
-    calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
-            params->r2);
-    final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+  if (params->r1 > 0) {
+    calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
+            params->r1);
+    final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
                  height, highbd);
   }
 #else   // CONFIG_FAST_SGR
   for (int i = 0; i < 2; ++i) {
-    int r = i ? params->r2 : params->r1;
-    int e = i ? params->e2 : params->e1;
+    int r = i ? params->r1 : params->r0;
+    int e = i ? params->e1 : params->e0;
     if (r == 0) continue;
 
-    int32_t *flt = i ? flt2 : flt1;
+    int32_t *flt = i ? flt1 : flt0;
 
     assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
@@ -623,26 +623,26 @@
 #endif  // CONFIG_FAST_SGR
 #else   // CONFIG_SKIP_SGR
 #if CONFIG_FAST_SGR
-  assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
+  assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
   // r == 2 filter
-  assert(params->r1 == 2);
-  calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
-               params->r1);
-  final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+  assert(params->r0 == 2);
+  calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
+               params->r0);
+  final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
                     height, highbd);
 
   // r == 1 filter
-  assert(params->r2 == 1);
-  calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
-          params->r2);
-  final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+  assert(params->r1 == 1);
+  calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
+          params->r1);
+  final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
                height, highbd);
 #else   // CONFIG_FAST_SGR
   for (int i = 0; i < 2; ++i) {
-    int r = i ? params->r2 : params->r1;
-    int e = i ? params->e2 : params->e1;
-    int32_t *flt = i ? flt2 : flt1;
+    int r = i ? params->r1 : params->r0;
+    int e = i ? params->e1 : params->e0;
+    int32_t *flt = i ? flt1 : flt0;
 
     assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
@@ -659,17 +659,17 @@
                                        const int *xqd, uint8_t *dst8,
                                        int dst_stride, int32_t *tmpbuf,
                                        int bit_depth, int highbd) {
-  int32_t *flt1 = tmpbuf;
-  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
+  int32_t *flt0 = tmpbuf;
+  int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 #if CONFIG_SKIP_SGR
   const sgr_params_type *params = &sgr_params[eps];
-  av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
+  av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1,
                                   width, params, bit_depth, highbd);
   int xq[2];
   decode_xq(xqd, xq, params);
 #else   // CONFIG_SKIP_SGR
-  av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
+  av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1,
                                   width, &sgr_params[eps], bit_depth, highbd);
   int xq[2];
   decode_xq(xqd, xq);
@@ -705,27 +705,27 @@
       __m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
       __m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
 
-      if (params->r1 > 0) {
-        const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
+      if (params->r0 > 0) {
+        const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0);
         v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0));
 
-        const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
+        const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1);
         v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1));
       }
 
-      if (params->r2 > 0) {
-        const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0);
+      if (params->r1 > 0) {
+        const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
         v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0));
 
-        const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1);
+        const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
         v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1));
       }
 #else   // CONFIG_SKIP_SGR
-      const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
-      const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
+      const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0);
+      const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1);
 
-      const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0);
-      const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1);
+      const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
+      const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
 
       const __m256i v_0 =
           _mm256_add_epi32(_mm256_add_epi32(_mm256_mullo_epi32(xq0, f1_0),

diff --git a/av1/common/x86/selfguided_sse4.c b/av1/common/x86/selfguided_sse4.c
index 8cbb84f..5042854 100644
--- a/av1/common/x86/selfguided_sse4.c
+++ b/av1/common/x86/selfguided_sse4.c

@@ -431,10 +431,10 @@
 // The final filter for the FAST_SGR self-guided restoration. Computes a
 // weighted average across A, B with "cross sums" (see cross_sum_...
 // implementations above).
-static void final_filter_fast2(int32_t *dst, int dst_stride, const int32_t *A,
-                               const int32_t *B, int buf_stride,
-                               const void *dgd8, int dgd_stride, int width,
-                               int height, int highbd) {
+static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
+                              const int32_t *B, int buf_stride,
+                              const void *dgd8, int dgd_stride, int width,
+                              int height, int highbd) {
   const int nb0 = 5;
   const int nb1 = 4;
 
@@ -486,7 +486,7 @@
 
 void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
                                        int height, int dgd_stride,
-                                       int32_t *flt1, int32_t *flt2,
+                                       int32_t *flt0, int32_t *flt1,
                                        int flt_stride,
                                        const sgr_params_type *params,
                                        int bit_depth, int highbd) {
@@ -538,36 +538,36 @@
     integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
                     buf_stride);
 
-// Write to flt1 and flt2
+// Write to flt0 and flt1
 #if CONFIG_SKIP_SGR
   // If params->r == 0 we skip the corresponding filter. We only allow one of
   // the radii to be 0, as having both equal to 0 would be equivalent to
   // skipping SGR entirely.
-  assert(!(params->r1 == 0 && params->r2 == 0));
+  assert(!(params->r0 == 0 && params->r1 == 0));
 #if CONFIG_FAST_SGR
+  assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
   assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
-  assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
-  if (params->r1 > 0) {
-    calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
-                 params->r1);
-    final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
-                       width, height, highbd);
+  if (params->r0 > 0) {
+    calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
+                 params->r0);
+    final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
+                      width, height, highbd);
   }
 
-  if (params->r2 > 0) {
-    calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
-            params->r2);
-    final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+  if (params->r1 > 0) {
+    calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
+            params->r1);
+    final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
                  height, highbd);
   }
 #else   // CONFIG_FAST_SGR
   for (int i = 0; i < 2; ++i) {
-    int r = i ? params->r2 : params->r1;
-    int e = i ? params->e2 : params->e1;
+    int r = i ? params->r1 : params->r0;
+    int e = i ? params->e1 : params->e0;
     if (r == 0) continue;
 
-    int32_t *flt = i ? flt2 : flt1;
+    int32_t *flt = i ? flt1 : flt0;
 
     assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
@@ -578,26 +578,26 @@
 #endif  // CONFIG_FAST_SGR
 #else   // CONFIG_SKIP_SGR
 #if CONFIG_FAST_SGR
-  assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
+  assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
   // r == 2 filter
-  assert(params->r1 == 2);
-  calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
-               params->r1);
-  final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
-                     width, height, highbd);
+  assert(params->r0 == 2);
+  calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
+               params->r0);
+  final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+                    height, highbd);
 
   // r == 1 filter
-  assert(params->r2 == 1);
-  calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
-          params->r2);
-  final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
+  assert(params->r1 == 1);
+  calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
+          params->r1);
+  final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
                height, highbd);
 #else   // CONFIG_FAST_SGR
   for (int i = 0; i < 2; ++i) {
-    int r = i ? params->r2 : params->r1;
-    int e = i ? params->e2 : params->e1;
-    int32_t *flt = i ? flt2 : flt1;
+    int r = i ? params->r1 : params->r0;
+    int e = i ? params->e1 : params->e0;
+    int32_t *flt = i ? flt1 : flt0;
 
     assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
@@ -614,17 +614,17 @@
                                          const int *xqd, uint8_t *dst8,
                                          int dst_stride, int32_t *tmpbuf,
                                          int bit_depth, int highbd) {
-  int32_t *flt1 = tmpbuf;
-  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
+  int32_t *flt0 = tmpbuf;
+  int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
 #if CONFIG_SKIP_SGR
   const sgr_params_type *params = &sgr_params[eps];
-  av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
+  av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1,
                                     width, params, bit_depth, highbd);
   int xq[2];
   decode_xq(xqd, xq, params);
 #else   // CONFIG_SKIP_SGR
-  av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
+  av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1,
                                     width, &sgr_params[eps], bit_depth, highbd);
   int xq[2];
   decode_xq(xqd, xq);
@@ -655,26 +655,26 @@
       __m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
       __m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
 
-      if (params->r1 > 0) {
-        const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
+      if (params->r0 > 0) {
+        const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0);
         v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0));
 
-        const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
+        const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1);
         v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1));
       }
 
-      if (params->r2 > 0) {
-        const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0);
+      if (params->r1 > 0) {
+        const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
         v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0));
 
-        const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1);
+        const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
         v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1));
       }
 #else   // CONFIG_SKIP_SGR
-      const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
-      const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0);
-      const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
-      const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1);
+      const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0);
+      const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
+      const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1);
+      const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
 
       const __m128i v_0 = _mm_add_epi32(
           _mm_add_epi32(_mm_mullo_epi32(xq0, f1_0), _mm_mullo_epi32(xq1, f2_0)),

diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index d3fa2ff..b546931 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c

@@ -917,14 +917,14 @@
   sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
   const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
 
-  if (params->r1 == 0) {
+  if (params->r0 == 0) {
     sgrproj_info->xqd[0] = 0;
     sgrproj_info->xqd[1] =
         aom_read_primitive_refsubexpfin(
             rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
             ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
         SGRPROJ_PRJ_MIN1;
-  } else if (params->r2 == 0) {
+  } else if (params->r1 == 0) {
     sgrproj_info->xqd[0] =
         aom_read_primitive_refsubexpfin(
             rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,

diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 21e23ea..99dd67a 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c

@@ -2194,13 +2194,13 @@
   aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS);
   const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
 
-  if (params->r1 == 0) {
+  if (params->r0 == 0) {
     assert(sgrproj_info->xqd[0] == 0);
     aom_write_primitive_refsubexpfin(
         wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
         ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
         sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
-  } else if (params->r2 == 0) {
+  } else if (params->r1 == 0) {
     aom_write_primitive_refsubexpfin(
         wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
         ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,

diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 5d1d269..13c40ba 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c

@@ -178,8 +178,8 @@
 static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
                                     int src_stride, const uint8_t *dat8,
                                     int dat_stride, int use_highbitdepth,
-                                    int32_t *flt1, int flt1_stride,
-                                    int32_t *flt2, int flt2_stride, int *xqd
+                                    int32_t *flt0, int flt0_stride,
+                                    int32_t *flt1, int flt1_stride, int *xqd
 #if CONFIG_SKIP_SGR
                                     ,
                                     const sgr_params_type *params
@@ -202,11 +202,11 @@
             (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
 #if CONFIG_SKIP_SGR
         int32_t v = u << SGRPROJ_PRJ_BITS;
-        if (params->r1 > 0) v += xq[0] * (flt1[i * flt1_stride + j] - u);
-        if (params->r2 > 0) v += xq[1] * (flt2[i * flt2_stride + j] - u);
+        if (params->r0 > 0) v += xq[0] * (flt0[i * flt0_stride + j] - u);
+        if (params->r1 > 0) v += xq[1] * (flt1[i * flt1_stride + j] - u);
 #else   // CONFIG_SKIP_SGR
-        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
-        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
+        const int32_t f1 = (int32_t)flt0[i * flt0_stride + j] - u;
+        const int32_t f2 = (int32_t)flt1[i * flt1_stride + j] - u;
         const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
 #endif  // CONFIG_SKIP_SGR
         const int32_t e =
@@ -224,11 +224,11 @@
             (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
 #if CONFIG_SKIP_SGR
         int32_t v = u << SGRPROJ_PRJ_BITS;
-        if (params->r1 > 0) v += xq[0] * (flt1[i * flt1_stride + j] - u);
-        if (params->r2 > 0) v += xq[1] * (flt2[i * flt2_stride + j] - u);
+        if (params->r0 > 0) v += xq[0] * (flt0[i * flt0_stride + j] - u);
+        if (params->r1 > 0) v += xq[1] * (flt1[i * flt1_stride + j] - u);
 #else   // CONFIG_SKIP_SGR
-        const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
-        const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
+        const int32_t f1 = (int32_t)flt0[i * flt0_stride + j] - u;
+        const int32_t f2 = (int32_t)flt1[i * flt1_stride + j] - u;
         const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
 #endif  // CONFIG_SKIP_SGR
         const int32_t e =
@@ -244,8 +244,8 @@
 #define USE_SGRPROJ_REFINEMENT_SEARCH 1
 static int64_t finer_search_pixel_proj_error(
     const uint8_t *src8, int width, int height, int src_stride,
-    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
-    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd
+    const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt0,
+    int flt0_stride, int32_t *flt1, int flt1_stride, int start_step, int *xqd
 #if CONFIG_SKIP_SGR
     ,
     const sgr_params_type *params
@@ -253,12 +253,12 @@
 ) {
 #if CONFIG_SKIP_SGR
   int64_t err = get_pixel_proj_error(
-      src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt1,
-      flt1_stride, flt2, flt2_stride, xqd, params);
+      src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt0,
+      flt0_stride, flt1, flt1_stride, xqd, params);
 #else   // CONFIG_SKIP_SGR
   int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                     dat_stride, use_highbitdepth, flt1,
-                                     flt1_stride, flt2, flt2_stride, xqd);
+                                     dat_stride, use_highbitdepth, flt0,
+                                     flt0_stride, flt1, flt1_stride, xqd);
 #endif  // CONFIG_SKIP_SGR
   (void)start_step;
 #if USE_SGRPROJ_REFINEMENT_SEARCH
@@ -268,7 +268,7 @@
   for (int s = start_step; s >= 1; s >>= 1) {
     for (int p = 0; p < 2; ++p) {
 #if CONFIG_SKIP_SGR
-      if ((params->r1 == 0 && p == 0) || (params->r2 == 0 && p == 1)) continue;
+      if ((params->r0 == 0 && p == 0) || (params->r1 == 0 && p == 1)) continue;
 #endif
       int skip = 0;
       do {
@@ -277,12 +277,12 @@
 #if CONFIG_SKIP_SGR
           err2 =
               get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                   dat_stride, use_highbitdepth, flt1,
-                                   flt1_stride, flt2, flt2_stride, xqd, params);
+                                   dat_stride, use_highbitdepth, flt0,
+                                   flt0_stride, flt1, flt1_stride, xqd, params);
 #else   // CONFIG_SKIP_SGR
           err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                      dat_stride, use_highbitdepth, flt1,
-                                      flt1_stride, flt2, flt2_stride, xqd);
+                                      dat_stride, use_highbitdepth, flt0,
+                                      flt0_stride, flt1, flt1_stride, xqd);
 #endif  // CONFIG_SKIP_SGR
           if (err2 > err) {
             xqd[p] += s;
@@ -302,12 +302,12 @@
 #if CONFIG_SKIP_SGR
           err2 =
               get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                   dat_stride, use_highbitdepth, flt1,
-                                   flt1_stride, flt2, flt2_stride, xqd, params);
+                                   dat_stride, use_highbitdepth, flt0,
+                                   flt0_stride, flt1, flt1_stride, xqd, params);
 #else   // CONFIG_SKIP_SGR
           err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
-                                      dat_stride, use_highbitdepth, flt1,
-                                      flt1_stride, flt2, flt2_stride, xqd);
+                                      dat_stride, use_highbitdepth, flt0,
+                                      flt0_stride, flt1, flt1_stride, xqd);
 #endif  // CONFIG_SKIP_SGR
           if (err2 > err) {
             xqd[p] -= s;
@@ -328,8 +328,8 @@
 static void get_proj_subspace(const uint8_t *src8, int width, int height,
                               int src_stride, const uint8_t *dat8,
                               int dat_stride, int use_highbitdepth,
-                              int32_t *flt1, int flt1_stride, int32_t *flt2,
-                              int flt2_stride, int *xq
+                              int32_t *flt0, int flt0_stride, int32_t *flt1,
+                              int flt1_stride, int *xq
 #if CONFIG_SKIP_SGR
                               ,
                               const sgr_params_type *params
@@ -357,12 +357,12 @@
             (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
 #if CONFIG_SKIP_SGR
         const double f1 =
-            (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
+            (params->r0 > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
         const double f2 =
-            (params->r2 > 0) ? (double)flt2[i * flt2_stride + j] - u : 0;
+            (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
 #else   // CONFIG_SKIP_SGR
-        const double f1 = (double)flt1[i * flt1_stride + j] - u;
-        const double f2 = (double)flt2[i * flt2_stride + j] - u;
+        const double f1 = (double)flt0[i * flt0_stride + j] - u;
+        const double f2 = (double)flt1[i * flt1_stride + j] - u;
 #endif  // CONFIG_SKIP_SGR
         H[0][0] += f1 * f1;
         H[1][1] += f2 * f2;
@@ -381,12 +381,12 @@
             (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
 #if CONFIG_SKIP_SGR
         const double f1 =
-            (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
+            (params->r0 > 0) ? (double)flt0[i * flt0_stride + j] - u : 0;
         const double f2 =
-            (params->r2 > 0) ? (double)flt2[i * flt2_stride + j] - u : 0;
+            (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
 #else   // CONFIG_SKIP_SGR
-        const double f1 = (double)flt1[i * flt1_stride + j] - u;
-        const double f2 = (double)flt2[i * flt2_stride + j] - u;
+        const double f1 = (double)flt0[i * flt0_stride + j] - u;
+        const double f2 = (double)flt1[i * flt1_stride + j] - u;
 #endif  // CONFIG_SKIP_SGR
         H[0][0] += f1 * f1;
         H[1][1] += f2 * f2;
@@ -403,7 +403,7 @@
   C[0] /= size;
   C[1] /= size;
 #if CONFIG_SKIP_SGR
-  if (params->r1 == 0) {
+  if (params->r0 == 0) {
     // H matrix is now only the scalar H[1][1]
     // C vector is now only the scalar C[1]
     Det = H[1][1];
@@ -413,7 +413,7 @@
 
     xq[0] = 0;
     xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
-  } else if (params->r2 == 0) {
+  } else if (params->r1 == 0) {
     // H matrix is now only the scalar H[0][0]
     // C vector is now only the scalar C[0]
     Det = H[0][0];
@@ -444,11 +444,11 @@
 
 #if CONFIG_SKIP_SGR
 void encode_xq(int *xq, int *xqd, const sgr_params_type *params) {
-  if (params->r1 == 0) {
+  if (params->r0 == 0) {
     xqd[0] = 0;
     xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1,
                    SGRPROJ_PRJ_MAX1);
-  } else if (params->r2 == 0) {
+  } else if (params->r1 == 0) {
     xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
     xqd[1] = 0;
   } else {
@@ -469,19 +469,19 @@
 // Apply the self-guided filter across an entire restoration unit.
 static void apply_sgr(const sgr_params_type *params, const uint8_t *dat8,
                       int width, int height, int dat_stride, int use_highbd,
-                      int bit_depth, int pu_width, int pu_height, int32_t *flt1,
-                      int32_t *flt2, int flt_stride) {
+                      int bit_depth, int pu_width, int pu_height, int32_t *flt0,
+                      int32_t *flt1, int flt_stride) {
   for (int i = 0; i < height; i += pu_height) {
     const int h = AOMMIN(pu_height, height - i);
+    int32_t *flt0_row = flt0 + i * flt_stride;
     int32_t *flt1_row = flt1 + i * flt_stride;
-    int32_t *flt2_row = flt2 + i * flt_stride;
     const uint8_t *dat8_row = dat8 + i * dat_stride;
 
     // Iterate over the stripe in blocks of width pu_width
     for (int j = 0; j < width; j += pu_width) {
       const int w = AOMMIN(pu_width, width - j);
-      av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt1_row + j,
-                                 flt2_row + j, flt_stride, params, bit_depth,
+      av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt0_row + j,
+                                 flt1_row + j, flt_stride, params, bit_depth,
                                  use_highbd);
     }
   }
@@ -491,8 +491,8 @@
     const uint8_t *dat8, int width, int height, int dat_stride,
     const uint8_t *src8, int src_stride, int use_highbitdepth, int bit_depth,
     int pu_width, int pu_height, int32_t *rstbuf) {
-  int32_t *flt1 = rstbuf;
-  int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
+  int32_t *flt0 = rstbuf;
+  int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
   int ep, bestep = 0;
   int64_t besterr = -1;
   int exqd[2], bestxqd[2] = { 0, 0 };
@@ -507,15 +507,15 @@
     int exq[2];
 
     apply_sgr(params, dat8, width, height, dat_stride, use_highbitdepth,
-              bit_depth, pu_width, pu_height, flt1, flt2, flt_stride);
+              bit_depth, pu_width, pu_height, flt0, flt1, flt_stride);
     aom_clear_system_state();
 #if CONFIG_SKIP_SGR
     get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
-                      use_highbitdepth, flt1, flt_stride, flt2, flt_stride, exq,
+                      use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq,
                       params);
 #else   // CONFIG_SKIP_SGR
     get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
-                      use_highbitdepth, flt1, flt_stride, flt2, flt_stride,
+                      use_highbitdepth, flt0, flt_stride, flt1, flt_stride,
                       exq);
 #endif  // CONFIG_SKIP_SGR
     aom_clear_system_state();
@@ -523,12 +523,12 @@
     encode_xq(exq, exqd, params);
     int64_t err = finer_search_pixel_proj_error(
         src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
-        flt1, flt_stride, flt2, flt_stride, 2, exqd, params);
+        flt0, flt_stride, flt1, flt_stride, 2, exqd, params);
 #else   // CONFIG_SKIP_SGR
     encode_xq(exq, exqd);
     int64_t err = finer_search_pixel_proj_error(
         src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
-        flt1, flt_stride, flt2, flt_stride, 2, exqd);
+        flt0, flt_stride, flt1, flt_stride, 2, exqd);
 #endif  // CONFIG_SKIP_SGR
     if (besterr == -1 || err < besterr) {
       bestep = ep;
@@ -550,12 +550,12 @@
   int bits = SGRPROJ_PARAMS_BITS;
 #if CONFIG_SKIP_SGR
   const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
-  if (params->r1 > 0)
+  if (params->r0 > 0)
     bits += aom_count_primitive_refsubexpfin(
         SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
         ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
         sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
-  if (params->r2 > 0)
+  if (params->r1 > 0)
     bits += aom_count_primitive_refsubexpfin(
         SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
         ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,