Add encoder/bitstream support for SKIP_SGR

The encoder can now make use of SGR filters with r == 0 when
SKIP_SGR == 1. If r == 0 for a filter, no blending coefficient
for that filter is written to/read from the bitstream.

Change-Id: I8496b87a7fa7b29f5ee9e7687bd117f93e90e649
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index e3a3ea7..e861243 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -29,12 +29,17 @@
 const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
 // r1, eps1, r2, eps2
 #if CONFIG_SKIP_SGR
-// Setting r = 0 skips the filter
-#endif  // CONFIG_SKIP_SGR
+  // Setting r = 0 skips the filter
   { 2, 12, 1, 4 },  { 2, 15, 1, 6 },  { 2, 18, 1, 8 },  { 2, 20, 1, 9 },
   { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
   { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 6 },
   { 2, 50, 1, 12 }, { 2, 60, 1, 13 }, { 2, 70, 1, 14 }, { 2, 80, 1, 15 },
+#else   // CONFIG_SKIP_SGR
+  { 2, 12, 1, 4 },  { 2, 15, 1, 6 },  { 2, 18, 1, 8 },  { 2, 20, 1, 9 },
+  { 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
+  { 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 6 },
+  { 2, 50, 1, 12 }, { 2, 60, 1, 13 }, { 2, 70, 1, 14 }, { 2, 80, 1, 15 },
+#endif  // CONFIG_SKIP_SGR
 };
 
 // Count horizontal or vertical units per tile (use a width or height for
@@ -740,10 +745,27 @@
     assert(0 && "Invalid value of r in self-guided filter");
 }
 
+#if CONFIG_SKIP_SGR
+void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) {
+  if (params->r1 == 0) {
+    assert(xqd[0] == 0);
+    xq[0] = 0;
+    xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1];
+  } else if (params->r2 == 0) {
+    assert(xqd[1] == 0);
+    xq[0] = xqd[0];
+    xq[1] = 0;
+  } else {
+    xq[0] = xqd[0];
+    xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
+  }
+}
+#else   // CONFIG_SKIP_SGR
 void decode_xq(const int *xqd, int *xq) {
   xq[0] = xqd[0];
   xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
 }
+#endif  // CONFIG_SKIP_SGR
 
 const int32_t x_by_xplus1[256] = {
   // Special case: Map 0 -> 1 (corresponding to a value of 1/256)
@@ -1059,21 +1081,14 @@
   assert(!(params->r1 == 0 && params->r2 == 0));
 
 #if CONFIG_FAST_SGR
-  if (params->r1 > 0) {
-    // r == 2 filter
-    assert(params->r1 == 2);
+  if (params->r1 > 0)
     av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
                                              flt1, flt_stride, bit_depth,
                                              params->r1, params->e1);
-  }
-
-  if (params->r2 > 0) {
-    // r == 1 filter
-    assert(params->r2 == 1);
+  if (params->r2 > 0)
     av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
                                         flt2, flt_stride, bit_depth, params->r2,
                                         params->e2);
-  }
 #else
   if (params->r1 > 0)
     av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
@@ -1111,7 +1126,6 @@
                                     uint8_t *dst8, int dst_stride,
                                     int32_t *tmpbuf, int bit_depth,
                                     int highbd) {
-  int xq[2];
   int32_t *flt1 = tmpbuf;
   int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
   assert(width * height <= RESTORATION_TILEPELS_MAX);
@@ -1120,11 +1134,14 @@
   const sgr_params_type *params = &sgr_params[eps];
   av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
                                params, bit_depth, highbd);
+  int xq[2];
+  decode_xq(xqd, xq, params);
 #else   // CONFIG_SKIP_SGR
   av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
                                &sgr_params[eps], bit_depth, highbd);
-#endif  // CONFIG_SKIP_SGR
+  int xq[2];
   decode_xq(xqd, xq);
+#endif  // CONFIG_SKIP_SGR
   for (int i = 0; i < height; ++i) {
     for (int j = 0; j < width; ++j) {
       const int k = i * width + j;
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 8734754..520cfa5 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -273,7 +273,11 @@
 
 void extend_frame(uint8_t *data, int width, int height, int stride,
                   int border_horz, int border_vert, int highbd);
+#if CONFIG_SKIP_SGR
+void decode_xq(const int *xqd, int *xq, const sgr_params_type *params);
+#else   // CONFIG_SKIP_SGR
 void decode_xq(const int *xqd, int *xq);
+#endif  // CONFIG_SKIP_SGR
 
 // Filter a single loop restoration unit.
 //
diff --git a/av1/common/x86/selfguided_avx2.c b/av1/common/x86/selfguided_avx2.c
index db82db4..bc82bec 100644
--- a/av1/common/x86/selfguided_avx2.c
+++ b/av1/common/x86/selfguided_avx2.c
@@ -594,8 +594,6 @@
   assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
   if (params->r1 > 0) {
-    // r == 2 filter
-    assert(params->r1 == 2);
     calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
                  params->r1);
     final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
@@ -603,8 +601,6 @@
   }
 
   if (params->r2 > 0) {
-    // r == 1 filter
-    assert(params->r2 == 1);
     calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
             params->r2);
     final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
@@ -670,13 +666,14 @@
   const sgr_params_type *params = &sgr_params[eps];
   av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
                                   width, params, bit_depth, highbd);
+  int xq[2];
+  decode_xq(xqd, xq, params);
 #else   // CONFIG_SKIP_SGR
   av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
                                   width, &sgr_params[eps], bit_depth, highbd);
-#endif  // CONFIG_SKIP_SGR
-
   int xq[2];
   decode_xq(xqd, xq);
+#endif  // CONFIG_SKIP_SGR
 
   __m256i xq0 = _mm256_set1_epi32(xq[0]);
   __m256i xq1 = _mm256_set1_epi32(xq[1]);
diff --git a/av1/common/x86/selfguided_sse4.c b/av1/common/x86/selfguided_sse4.c
index 7f94fd9..8cbb84f 100644
--- a/av1/common/x86/selfguided_sse4.c
+++ b/av1/common/x86/selfguided_sse4.c
@@ -549,8 +549,6 @@
   assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
 
   if (params->r1 > 0) {
-    // r == 2 filter
-    assert(params->r1 == 2);
     calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
                  params->r1);
     final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
@@ -558,8 +556,6 @@
   }
 
   if (params->r2 > 0) {
-    // r == 1 filter
-    assert(params->r2 == 1);
     calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
             params->r2);
     final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
@@ -625,13 +621,14 @@
   const sgr_params_type *params = &sgr_params[eps];
   av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
                                     width, params, bit_depth, highbd);
+  int xq[2];
+  decode_xq(xqd, xq, params);
 #else   // CONFIG_SKIP_SGR
   av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
                                     width, &sgr_params[eps], bit_depth, highbd);
-#endif  // CONFIG_SKIP_SGR
-
   int xq[2];
   decode_xq(xqd, xq);
+#endif  // CONFIG_SKIP_SGR
 
   __m128i xq0 = _mm_set1_epi32(xq[0]);
   __m128i xq1 = _mm_set1_epi32(xq[1]);
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index cf7a1fd..d3fa2ff 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -911,6 +911,42 @@
   memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
 }
 
+#if CONFIG_SKIP_SGR
+static void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
+                                SgrprojInfo *ref_sgrproj_info, aom_reader *rb) {
+  sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
+  const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
+
+  if (params->r1 == 0) {
+    sgrproj_info->xqd[0] = 0;
+    sgrproj_info->xqd[1] =
+        aom_read_primitive_refsubexpfin(
+            rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
+            ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
+        SGRPROJ_PRJ_MIN1;
+  } else if (params->r2 == 0) {
+    sgrproj_info->xqd[0] =
+        aom_read_primitive_refsubexpfin(
+            rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
+            ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) +
+        SGRPROJ_PRJ_MIN0;
+    sgrproj_info->xqd[1] = 0;
+  } else {
+    sgrproj_info->xqd[0] =
+        aom_read_primitive_refsubexpfin(
+            rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
+            ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) +
+        SGRPROJ_PRJ_MIN0;
+    sgrproj_info->xqd[1] =
+        aom_read_primitive_refsubexpfin(
+            rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
+            ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
+        SGRPROJ_PRJ_MIN1;
+  }
+
+  memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
+}
+#else   // CONFIG_SKIP_SGR
 static void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
                                 SgrprojInfo *ref_sgrproj_info, aom_reader *rb) {
   sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
@@ -926,6 +962,7 @@
       SGRPROJ_PRJ_MIN1;
   memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
 }
+#endif  // CONFIG_SKIP_SGR
 
 static void loop_restoration_read_sb_coeffs(const AV1_COMMON *const cm,
                                             MACROBLOCKD *xd,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 2693b26..21e23ea 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2187,6 +2187,39 @@
   memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info));
 }
 
+#if CONFIG_SKIP_SGR
+static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
+                                 SgrprojInfo *ref_sgrproj_info,
+                                 aom_writer *wb) {
+  aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS);
+  const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
+
+  if (params->r1 == 0) {
+    assert(sgrproj_info->xqd[0] == 0);
+    aom_write_primitive_refsubexpfin(
+        wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
+        ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
+        sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
+  } else if (params->r2 == 0) {
+    aom_write_primitive_refsubexpfin(
+        wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
+        ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
+        sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
+    assert(sgrproj_info->xqd[1] == 0);
+  } else {
+    aom_write_primitive_refsubexpfin(
+        wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
+        ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
+        sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
+    aom_write_primitive_refsubexpfin(
+        wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
+        ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
+        sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
+  }
+
+  memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
+}
+#else   // CONFIG_SKIP_SGR
 static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
                                  SgrprojInfo *ref_sgrproj_info,
                                  aom_writer *wb) {
@@ -2201,6 +2234,7 @@
                                    sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
   memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info));
 }
+#endif  // CONFIG_SKIP_SGR
 
 static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm,
                                              MACROBLOCKD *xd,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 8491806..5d1d269 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -179,11 +179,20 @@
                                     int src_stride, const uint8_t *dat8,
                                     int dat_stride, int use_highbitdepth,
                                     int32_t *flt1, int flt1_stride,
-                                    int32_t *flt2, int flt2_stride, int *xqd) {
+                                    int32_t *flt2, int flt2_stride, int *xqd
+#if CONFIG_SKIP_SGR
+                                    ,
+                                    const sgr_params_type *params
+#endif  // CONFIG_SKIP_SGR
+) {
   int i, j;
   int64_t err = 0;
   int xq[2];
+#if CONFIG_SKIP_SGR
+  decode_xq(xqd, xq, params);
+#else   // CONFIG_SKIP_SGR
   decode_xq(xqd, xq);
+#endif  // CONFIG_SKIP_SGR
   if (!use_highbitdepth) {
     const uint8_t *src = src8;
     const uint8_t *dat = dat8;
@@ -191,9 +200,15 @@
       for (j = 0; j < width; ++j) {
         const int32_t u =
             (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
+#if CONFIG_SKIP_SGR
+        int32_t v = u << SGRPROJ_PRJ_BITS;
+        if (params->r1 > 0) v += xq[0] * (flt1[i * flt1_stride + j] - u);
+        if (params->r2 > 0) v += xq[1] * (flt2[i * flt2_stride + j] - u);
+#else   // CONFIG_SKIP_SGR
         const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
         const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
         const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
+#endif  // CONFIG_SKIP_SGR
         const int32_t e =
             ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
             src[i * src_stride + j];
@@ -207,9 +222,15 @@
       for (j = 0; j < width; ++j) {
         const int32_t u =
             (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
+#if CONFIG_SKIP_SGR
+        int32_t v = u << SGRPROJ_PRJ_BITS;
+        if (params->r1 > 0) v += xq[0] * (flt1[i * flt1_stride + j] - u);
+        if (params->r2 > 0) v += xq[1] * (flt2[i * flt2_stride + j] - u);
+#else   // CONFIG_SKIP_SGR
         const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
         const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
         const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
+#endif  // CONFIG_SKIP_SGR
         const int32_t e =
             ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
             src[i * src_stride + j];
@@ -224,10 +245,21 @@
 static int64_t finer_search_pixel_proj_error(
     const uint8_t *src8, int width, int height, int src_stride,
     const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1,
-    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) {
+    int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd
+#if CONFIG_SKIP_SGR
+    ,
+    const sgr_params_type *params
+#endif  // CONFIG_SKIP_SGR
+) {
+#if CONFIG_SKIP_SGR
+  int64_t err = get_pixel_proj_error(
+      src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt1,
+      flt1_stride, flt2, flt2_stride, xqd, params);
+#else   // CONFIG_SKIP_SGR
   int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8,
                                      dat_stride, use_highbitdepth, flt1,
                                      flt1_stride, flt2, flt2_stride, xqd);
+#endif  // CONFIG_SKIP_SGR
   (void)start_step;
 #if USE_SGRPROJ_REFINEMENT_SEARCH
   int64_t err2;
@@ -235,13 +267,23 @@
   int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 };
   for (int s = start_step; s >= 1; s >>= 1) {
     for (int p = 0; p < 2; ++p) {
+#if CONFIG_SKIP_SGR
+      if ((params->r1 == 0 && p == 0) || (params->r2 == 0 && p == 1)) continue;
+#endif
       int skip = 0;
       do {
         if (xqd[p] - s >= tap_min[p]) {
           xqd[p] -= s;
+#if CONFIG_SKIP_SGR
+          err2 =
+              get_pixel_proj_error(src8, width, height, src_stride, dat8,
+                                   dat_stride, use_highbitdepth, flt1,
+                                   flt1_stride, flt2, flt2_stride, xqd, params);
+#else   // CONFIG_SKIP_SGR
           err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
                                       dat_stride, use_highbitdepth, flt1,
                                       flt1_stride, flt2, flt2_stride, xqd);
+#endif  // CONFIG_SKIP_SGR
           if (err2 > err) {
             xqd[p] += s;
           } else {
@@ -257,9 +299,16 @@
       do {
         if (xqd[p] + s <= tap_max[p]) {
           xqd[p] += s;
+#if CONFIG_SKIP_SGR
+          err2 =
+              get_pixel_proj_error(src8, width, height, src_stride, dat8,
+                                   dat_stride, use_highbitdepth, flt1,
+                                   flt1_stride, flt2, flt2_stride, xqd, params);
+#else   // CONFIG_SKIP_SGR
           err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8,
                                       dat_stride, use_highbitdepth, flt1,
                                       flt1_stride, flt2, flt2_stride, xqd);
+#endif  // CONFIG_SKIP_SGR
           if (err2 > err) {
             xqd[p] -= s;
           } else {
@@ -280,7 +329,12 @@
                               int src_stride, const uint8_t *dat8,
                               int dat_stride, int use_highbitdepth,
                               int32_t *flt1, int flt1_stride, int32_t *flt2,
-                              int flt2_stride, int *xq) {
+                              int flt2_stride, int *xq
+#if CONFIG_SKIP_SGR
+                              ,
+                              const sgr_params_type *params
+#endif  // CONFIG_SKIP_SGR
+) {
   int i, j;
   double H[2][2] = { { 0, 0 }, { 0, 0 } };
   double C[2] = { 0, 0 };
@@ -301,8 +355,15 @@
         const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
         const double s =
             (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
+#if CONFIG_SKIP_SGR
+        const double f1 =
+            (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
+        const double f2 =
+            (params->r2 > 0) ? (double)flt2[i * flt2_stride + j] - u : 0;
+#else   // CONFIG_SKIP_SGR
         const double f1 = (double)flt1[i * flt1_stride + j] - u;
         const double f2 = (double)flt2[i * flt2_stride + j] - u;
+#endif  // CONFIG_SKIP_SGR
         H[0][0] += f1 * f1;
         H[1][1] += f2 * f2;
         H[0][1] += f1 * f2;
@@ -318,8 +379,15 @@
         const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
         const double s =
             (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
+#if CONFIG_SKIP_SGR
+        const double f1 =
+            (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0;
+        const double f2 =
+            (params->r2 > 0) ? (double)flt2[i * flt2_stride + j] - u : 0;
+#else   // CONFIG_SKIP_SGR
         const double f1 = (double)flt1[i * flt1_stride + j] - u;
         const double f2 = (double)flt2[i * flt2_stride + j] - u;
+#endif  // CONFIG_SKIP_SGR
         H[0][0] += f1 * f1;
         H[1][1] += f2 * f2;
         H[0][1] += f1 * f2;
@@ -334,20 +402,69 @@
   H[1][0] = H[0][1];
   C[0] /= size;
   C[1] /= size;
+#if CONFIG_SKIP_SGR
+  if (params->r1 == 0) {
+    // H matrix is now only the scalar H[1][1]
+    // C vector is now only the scalar C[1]
+    Det = H[1][1];
+    if (Det < 1e-8) return;  // ill-posed, return default values
+    x[0] = 0;
+    x[1] = C[1] / Det;
+
+    xq[0] = 0;
+    xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
+  } else if (params->r2 == 0) {
+    // H matrix is now only the scalar H[0][0]
+    // C vector is now only the scalar C[0]
+    Det = H[0][0];
+    if (Det < 1e-8) return;  // ill-posed, return default values
+    x[0] = C[0] / Det;
+    x[1] = 0;
+
+    xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
+    xq[1] = 0;
+  } else {
+    Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
+    if (Det < 1e-8) return;  // ill-posed, return default values
+    x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
+    x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
+
+    xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
+    xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
+  }
+#else   // CONFIG_SKIP_SGR
   Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]);
   if (Det < 1e-8) return;  // ill-posed, return default values
   x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det;
   x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det;
   xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS));
   xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS));
+#endif  // CONFIG_SKIP_SGR
 }
 
+#if CONFIG_SKIP_SGR
+void encode_xq(int *xq, int *xqd, const sgr_params_type *params) {
+  if (params->r1 == 0) {
+    xqd[0] = 0;
+    xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1,
+                   SGRPROJ_PRJ_MAX1);
+  } else if (params->r2 == 0) {
+    xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
+    xqd[1] = 0;
+  } else {
+    xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
+    xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1], SGRPROJ_PRJ_MIN1,
+                   SGRPROJ_PRJ_MAX1);
+  }
+}
+#else   // CONFIG_SKIP_SGR
 void encode_xq(int *xq, int *xqd) {
   xqd[0] = xq[0];
   xqd[0] = clamp(xqd[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
   xqd[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1];
   xqd[1] = clamp(xqd[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
 }
+#endif  // CONFIG_SKIP_SGR
 
 // Apply the self-guided filter across an entire restoration unit.
 static void apply_sgr(const sgr_params_type *params, const uint8_t *dat8,
@@ -386,19 +503,33 @@
          pu_height == RESTORATION_PROC_UNIT_SIZE);
 
   for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
+    const sgr_params_type *params = &sgr_params[ep];
     int exq[2];
-    apply_sgr(&sgr_params[ep], dat8, width, height, dat_stride,
-              use_highbitdepth, bit_depth, pu_width, pu_height, flt1, flt2,
-              flt_stride);
+
+    apply_sgr(params, dat8, width, height, dat_stride, use_highbitdepth,
+              bit_depth, pu_width, pu_height, flt1, flt2, flt_stride);
     aom_clear_system_state();
+#if CONFIG_SKIP_SGR
+    get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
+                      use_highbitdepth, flt1, flt_stride, flt2, flt_stride, exq,
+                      params);
+#else   // CONFIG_SKIP_SGR
     get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
                       use_highbitdepth, flt1, flt_stride, flt2, flt_stride,
                       exq);
+#endif  // CONFIG_SKIP_SGR
     aom_clear_system_state();
+#if CONFIG_SKIP_SGR
+    encode_xq(exq, exqd, params);
+    int64_t err = finer_search_pixel_proj_error(
+        src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
+        flt1, flt_stride, flt2, flt_stride, 2, exqd, params);
+#else   // CONFIG_SKIP_SGR
     encode_xq(exq, exqd);
     int64_t err = finer_search_pixel_proj_error(
         src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth,
         flt1, flt_stride, flt2, flt_stride, 2, exqd);
+#endif  // CONFIG_SKIP_SGR
     if (besterr == -1 || err < besterr) {
       bestep = ep;
       besterr = err;
@@ -417,6 +548,19 @@
 static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
                               SgrprojInfo *ref_sgrproj_info) {
   int bits = SGRPROJ_PARAMS_BITS;
+#if CONFIG_SKIP_SGR
+  const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
+  if (params->r1 > 0)
+    bits += aom_count_primitive_refsubexpfin(
+        SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
+        ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
+        sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0);
+  if (params->r2 > 0)
+    bits += aom_count_primitive_refsubexpfin(
+        SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
+        ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
+        sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
+#else   // CONFIG_SKIP_SGR
   bits += aom_count_primitive_refsubexpfin(
       SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
       ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
@@ -425,6 +569,7 @@
       SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
       ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
       sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
+#endif  // CONFIG_SKIP_SGR
   return bits;
 }