Clean up SGR code and make consistent Change-Id: I99c0cd287d154acc5063c92eb3ad4035bff8dad7
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 1fca124..f308fef 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -466,7 +466,7 @@ add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd"; specialize qw/apply_selfguided_restoration sse4_1 avx2/; - add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt1, int32_t *flt2, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd"; + add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt0, int32_t *flt1, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd"; specialize qw/av1_selfguided_restoration sse4_1 avx2/; }
diff --git a/av1/common/restoration.c b/av1/common/restoration.c index e861243..fac9ba8 100644 --- a/av1/common/restoration.c +++ b/av1/common/restoration.c
@@ -747,11 +747,11 @@ #if CONFIG_SKIP_SGR void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) { - if (params->r1 == 0) { + if (params->r0 == 0) { assert(xqd[0] == 0); xq[0] = 0; xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1]; - } else if (params->r2 == 0) { + } else if (params->r1 == 0) { assert(xqd[1] == 0); xq[0] = xqd[0]; xq[1] = 0; @@ -1051,7 +1051,7 @@ } void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height, - int dgd_stride, int32_t *flt1, int32_t *flt2, + int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd) { int32_t dgd32_[RESTORATION_PROC_UNIT_PELS]; @@ -1078,45 +1078,45 @@ // If params->r == 0 we skip the corresponding filter. We only allow one of // the radii to be 0, as having both equal to 0 would be equivalent to // skipping SGR entirely. - assert(!(params->r1 == 0 && params->r2 == 0)); + assert(!(params->r0 == 0 && params->r1 == 0)); #if CONFIG_FAST_SGR - if (params->r1 > 0) + if (params->r0 > 0) av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, - flt1, flt_stride, bit_depth, - params->r1, params->e1); - if (params->r2 > 0) - av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, - flt2, flt_stride, bit_depth, params->r2, - params->e2); -#else + flt0, flt_stride, bit_depth, + params->r0, params->e0); if (params->r1 > 0) av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1, flt_stride, bit_depth, params->r1, params->e1); - - if (params->r2 > 0) +#else + if (params->r0 > 0) av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, - flt2, flt_stride, bit_depth, params->r2, - params->e2); + flt0, flt_stride, bit_depth, params->r0, + params->e0); + + if (params->r1 > 0) + av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, + flt1, flt_stride, bit_depth, params->r1, + params->e1); #endif // CONFIG_FAST_SGR #else // CONFIG_SKIP_SGR #if CONFIG_FAST_SGR // r == 2 filter av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, - flt1, flt_stride, bit_depth, - params->r1, params->e1); + flt0, flt_stride, bit_depth, + params->r0, params->e0); // r == 1 filter - av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2, - flt_stride, bit_depth, params->r2, - params->e2); -#else av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1, flt_stride, bit_depth, params->r1, params->e1); - av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2, - flt_stride, bit_depth, params->r2, - params->e2); +#else + av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt0, + flt_stride, bit_depth, params->r0, + params->e0); + av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1, + flt_stride, bit_depth, params->r1, + params->e1); #endif // CONFIG_FAST_SGR #endif // CONFIG_SKIP_SGR } @@ -1126,18 +1126,18 @@ uint8_t *dst8, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd) { - int32_t *flt1 = tmpbuf; - int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + int32_t *flt0 = tmpbuf; + int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX; assert(width * height <= RESTORATION_TILEPELS_MAX); #if CONFIG_SKIP_SGR const sgr_params_type *params = &sgr_params[eps]; - av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width, + av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width, params, bit_depth, highbd); int xq[2]; decode_xq(xqd, xq, params); #else // CONFIG_SKIP_SGR - av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width, + av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width, &sgr_params[eps], bit_depth, highbd); int xq[2]; decode_xq(xqd, xq); @@ -1154,11 +1154,11 @@ int32_t v = u << SGRPROJ_PRJ_BITS; // If params->r == 0 then we skipped the filtering in // av1_selfguided_restoration_c, i.e. flt[k] == u - if (params->r1 > 0) v += xq[0] * (flt1[k] - u); - if (params->r2 > 0) v += xq[1] * (flt2[k] - u); + if (params->r0 > 0) v += xq[0] * (flt0[k] - u); + if (params->r1 > 0) v += xq[1] * (flt1[k] - u); #else // CONFIG_SKIP_SGR - const int32_t f1 = flt1[k] - u; - const int32_t f2 = flt2[k] - u; + const int32_t f1 = flt0[k] - u; + const int32_t f2 = flt1[k] - u; const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); #endif // CONFIG_SKIP_SGR const int16_t w =
diff --git a/av1/common/restoration.h b/av1/common/restoration.h index 520cfa5..e3a047e 100644 --- a/av1/common/restoration.h +++ b/av1/common/restoration.h
@@ -176,10 +176,10 @@ #endif typedef struct { + int r0; + int e0; int r1; int e1; - int r2; - int e2; } sgr_params_type; typedef struct {
diff --git a/av1/common/x86/selfguided_avx2.c b/av1/common/x86/selfguided_avx2.c index bc82bec..17be157 100644 --- a/av1/common/x86/selfguided_avx2.c +++ b/av1/common/x86/selfguided_avx2.c
@@ -526,8 +526,8 @@ #endif void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height, - int dgd_stride, int32_t *flt1, - int32_t *flt2, int flt_stride, + int dgd_stride, int32_t *flt0, + int32_t *flt1, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd) { // The ALIGN_POWER_OF_TWO macro here ensures that column 1 of Atl, Btl, @@ -583,36 +583,36 @@ integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl, buf_stride); -// Write to flt1 and flt2 +// Write to flt0 and flt1 #if CONFIG_SKIP_SGR // If params->r == 0 we skip the corresponding filter. We only allow one of // the radii to be 0, as having both equal to 0 would be equivalent to // skipping SGR entirely. - assert(!(params->r1 == 0 && params->r2 == 0)); + assert(!(params->r0 == 0 && params->r1 == 0)); #if CONFIG_FAST_SGR + assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - if (params->r1 > 0) { - calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, - params->r1); - final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, + if (params->r0 > 0) { + calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth, + params->r0); + final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, height, highbd); } - if (params->r2 > 0) { - calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth, - params->r2); - final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, + if (params->r1 > 0) { + calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, + params->r1); + final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, height, highbd); } #else // CONFIG_FAST_SGR for (int i = 0; i < 2; ++i) { - int r = i ? params->r2 : params->r1; - int e = i ? params->e2 : params->e1; + int r = i ? params->r1 : params->r0; + int e = i ? params->e1 : params->e0; if (r == 0) continue; - int32_t *flt = i ? flt2 : flt1; + int32_t *flt = i ? flt1 : flt0; assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); @@ -623,26 +623,26 @@ #endif // CONFIG_FAST_SGR #else // CONFIG_SKIP_SGR #if CONFIG_FAST_SGR - assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); + assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); // r == 2 filter - assert(params->r1 == 2); - calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, - params->r1); - final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, + assert(params->r0 == 2); + calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth, + params->r0); + final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, height, highbd); // r == 1 filter - assert(params->r2 == 1); - calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth, - params->r2); - final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, + assert(params->r1 == 1); + calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, + params->r1); + final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, height, highbd); #else // CONFIG_FAST_SGR for (int i = 0; i < 2; ++i) { - int r = i ? params->r2 : params->r1; - int e = i ? params->e2 : params->e1; - int32_t *flt = i ? flt2 : flt1; + int r = i ? params->r1 : params->r0; + int e = i ? params->e1 : params->e0; + int32_t *flt = i ? flt1 : flt0; assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); @@ -659,17 +659,17 @@ const int *xqd, uint8_t *dst8, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd) { - int32_t *flt1 = tmpbuf; - int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + int32_t *flt0 = tmpbuf; + int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX; assert(width * height <= RESTORATION_TILEPELS_MAX); #if CONFIG_SKIP_SGR const sgr_params_type *params = &sgr_params[eps]; - av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2, + av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1, width, params, bit_depth, highbd); int xq[2]; decode_xq(xqd, xq, params); #else // CONFIG_SKIP_SGR - av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2, + av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1, width, &sgr_params[eps], bit_depth, highbd); int xq[2]; decode_xq(xqd, xq); @@ -705,27 +705,27 @@ __m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS); __m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS); - if (params->r1 > 0) { - const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0); + if (params->r0 > 0) { + const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0); v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0)); - const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1); + const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1); v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1)); } - if (params->r2 > 0) { - const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0); + if (params->r1 > 0) { + const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0); v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0)); - const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1); + const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1); v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1)); } #else // CONFIG_SKIP_SGR - const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0); - const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1); + const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0); + const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1); - const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0); - const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1); + const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0); + const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1); const __m256i v_0 = _mm256_add_epi32(_mm256_add_epi32(_mm256_mullo_epi32(xq0, f1_0),
diff --git a/av1/common/x86/selfguided_sse4.c b/av1/common/x86/selfguided_sse4.c index 8cbb84f..5042854 100644 --- a/av1/common/x86/selfguided_sse4.c +++ b/av1/common/x86/selfguided_sse4.c
@@ -431,10 +431,10 @@ // The final filter for the FAST_SGR self-guided restoration. Computes a // weighted average across A, B with "cross sums" (see cross_sum_... // implementations above). -static void final_filter_fast2(int32_t *dst, int dst_stride, const int32_t *A, - const int32_t *B, int buf_stride, - const void *dgd8, int dgd_stride, int width, - int height, int highbd) { +static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A, + const int32_t *B, int buf_stride, + const void *dgd8, int dgd_stride, int width, + int height, int highbd) { const int nb0 = 5; const int nb1 = 4; @@ -486,7 +486,7 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width, int height, int dgd_stride, - int32_t *flt1, int32_t *flt2, + int32_t *flt0, int32_t *flt1, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd) { @@ -538,36 +538,36 @@ integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl, buf_stride); -// Write to flt1 and flt2 +// Write to flt0 and flt1 #if CONFIG_SKIP_SGR // If params->r == 0 we skip the corresponding filter. We only allow one of // the radii to be 0, as having both equal to 0 would be equivalent to // skipping SGR entirely. - assert(!(params->r1 == 0 && params->r2 == 0)); + assert(!(params->r0 == 0 && params->r1 == 0)); #if CONFIG_FAST_SGR + assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); - if (params->r1 > 0) { - calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, - params->r1); - final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, - width, height, highbd); + if (params->r0 > 0) { + calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth, + params->r0); + final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, + width, height, highbd); } - if (params->r2 > 0) { - calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth, - params->r2); - final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, + if (params->r1 > 0) { + calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, + params->r1); + final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, height, highbd); } #else // CONFIG_FAST_SGR for (int i = 0; i < 2; ++i) { - int r = i ? params->r2 : params->r1; - int e = i ? params->e2 : params->e1; + int r = i ? params->r1 : params->r0; + int e = i ? params->e1 : params->e0; if (r == 0) continue; - int32_t *flt = i ? flt2 : flt1; + int32_t *flt = i ? flt1 : flt0; assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); @@ -578,26 +578,26 @@ #endif // CONFIG_FAST_SGR #else // CONFIG_SKIP_SGR #if CONFIG_FAST_SGR - assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); + assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); // r == 2 filter - assert(params->r1 == 2); - calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, - params->r1); - final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, - width, height, highbd); + assert(params->r0 == 2); + calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth, + params->r0); + final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, + height, highbd); // r == 1 filter - assert(params->r2 == 1); - calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth, - params->r2); - final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, + assert(params->r1 == 1); + calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth, + params->r1); + final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width, height, highbd); #else // CONFIG_FAST_SGR for (int i = 0; i < 2; ++i) { - int r = i ? params->r2 : params->r1; - int e = i ? params->e2 : params->e1; - int32_t *flt = i ? flt2 : flt1; + int r = i ? params->r1 : params->r0; + int e = i ? params->e1 : params->e0; + int32_t *flt = i ? flt1 : flt0; assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ)); @@ -614,17 +614,17 @@ const int *xqd, uint8_t *dst8, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd) { - int32_t *flt1 = tmpbuf; - int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + int32_t *flt0 = tmpbuf; + int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX; assert(width * height <= RESTORATION_TILEPELS_MAX); #if CONFIG_SKIP_SGR const sgr_params_type *params = &sgr_params[eps]; - av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2, + av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1, width, params, bit_depth, highbd); int xq[2]; decode_xq(xqd, xq, params); #else // CONFIG_SKIP_SGR - av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2, + av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1, width, &sgr_params[eps], bit_depth, highbd); int xq[2]; decode_xq(xqd, xq); @@ -655,26 +655,26 @@ __m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS); __m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS); - if (params->r1 > 0) { - const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0); + if (params->r0 > 0) { + const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0); v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0)); - const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1); + const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1); v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1)); } - if (params->r2 > 0) { - const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0); + if (params->r1 > 0) { + const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0); v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0)); - const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1); + const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1); v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1)); } #else // CONFIG_SKIP_SGR - const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0); - const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0); - const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1); - const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1); + const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0); + const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0); + const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1); + const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1); const __m128i v_0 = _mm_add_epi32( _mm_add_epi32(_mm_mullo_epi32(xq0, f1_0), _mm_mullo_epi32(xq1, f2_0)),
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index d3fa2ff..b546931 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c
@@ -917,14 +917,14 @@ sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR); const sgr_params_type *params = &sgr_params[sgrproj_info->ep]; - if (params->r1 == 0) { + if (params->r0 == 0) { sgrproj_info->xqd[0] = 0; sgrproj_info->xqd[1] = aom_read_primitive_refsubexpfin( rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) + SGRPROJ_PRJ_MIN1; - } else if (params->r2 == 0) { + } else if (params->r1 == 0) { sgrproj_info->xqd[0] = aom_read_primitive_refsubexpfin( rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index 21e23ea..99dd67a 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c
@@ -2194,13 +2194,13 @@ aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS); const sgr_params_type *params = &sgr_params[sgrproj_info->ep]; - if (params->r1 == 0) { + if (params->r0 == 0) { assert(sgrproj_info->xqd[0] == 0); aom_write_primitive_refsubexpfin( wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1); - } else if (params->r2 == 0) { + } else if (params->r1 == 0) { aom_write_primitive_refsubexpfin( wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index 5d1d269..13c40ba 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c
@@ -178,8 +178,8 @@ static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int use_highbitdepth, - int32_t *flt1, int flt1_stride, - int32_t *flt2, int flt2_stride, int *xqd + int32_t *flt0, int flt0_stride, + int32_t *flt1, int flt1_stride, int *xqd #if CONFIG_SKIP_SGR , const sgr_params_type *params @@ -202,11 +202,11 @@ (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS); #if CONFIG_SKIP_SGR int32_t v = u << SGRPROJ_PRJ_BITS; - if (params->r1 > 0) v += xq[0] * (flt1[i * flt1_stride + j] - u); - if (params->r2 > 0) v += xq[1] * (flt2[i * flt2_stride + j] - u); + if (params->r0 > 0) v += xq[0] * (flt0[i * flt0_stride + j] - u); + if (params->r1 > 0) v += xq[1] * (flt1[i * flt1_stride + j] - u); #else // CONFIG_SKIP_SGR - const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u; - const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u; + const int32_t f1 = (int32_t)flt0[i * flt0_stride + j] - u; + const int32_t f2 = (int32_t)flt1[i * flt1_stride + j] - u; const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); #endif // CONFIG_SKIP_SGR const int32_t e = @@ -224,11 +224,11 @@ (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS); #if CONFIG_SKIP_SGR int32_t v = u << SGRPROJ_PRJ_BITS; - if (params->r1 > 0) v += xq[0] * (flt1[i * flt1_stride + j] - u); - if (params->r2 > 0) v += xq[1] * (flt2[i * flt2_stride + j] - u); + if (params->r0 > 0) v += xq[0] * (flt0[i * flt0_stride + j] - u); + if (params->r1 > 0) v += xq[1] * (flt1[i * flt1_stride + j] - u); #else // CONFIG_SKIP_SGR - const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u; - const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u; + const int32_t f1 = (int32_t)flt0[i * flt0_stride + j] - u; + const int32_t f2 = (int32_t)flt1[i * flt1_stride + j] - u; const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); #endif // CONFIG_SKIP_SGR const int32_t e = @@ -244,8 +244,8 @@ #define USE_SGRPROJ_REFINEMENT_SEARCH 1 static int64_t finer_search_pixel_proj_error( const uint8_t *src8, int width, int height, int src_stride, - const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1, - int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd + const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt0, + int flt0_stride, int32_t *flt1, int flt1_stride, int start_step, int *xqd #if CONFIG_SKIP_SGR , const sgr_params_type *params @@ -253,12 +253,12 @@ ) { #if CONFIG_SKIP_SGR int64_t err = get_pixel_proj_error( - src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt1, - flt1_stride, flt2, flt2_stride, xqd, params); + src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt0, + flt0_stride, flt1, flt1_stride, xqd, params); #else // CONFIG_SKIP_SGR int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt1, - flt1_stride, flt2, flt2_stride, xqd); + dat_stride, use_highbitdepth, flt0, + flt0_stride, flt1, flt1_stride, xqd); #endif // CONFIG_SKIP_SGR (void)start_step; #if USE_SGRPROJ_REFINEMENT_SEARCH @@ -268,7 +268,7 @@ for (int s = start_step; s >= 1; s >>= 1) { for (int p = 0; p < 2; ++p) { #if CONFIG_SKIP_SGR - if ((params->r1 == 0 && p == 0) || (params->r2 == 0 && p == 1)) continue; + if ((params->r0 == 0 && p == 0) || (params->r1 == 0 && p == 1)) continue; #endif int skip = 0; do { @@ -277,12 +277,12 @@ #if CONFIG_SKIP_SGR err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt1, - flt1_stride, flt2, flt2_stride, xqd, params); + dat_stride, use_highbitdepth, flt0, + flt0_stride, flt1, flt1_stride, xqd, params); #else // CONFIG_SKIP_SGR err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt1, - flt1_stride, flt2, flt2_stride, xqd); + dat_stride, use_highbitdepth, flt0, + flt0_stride, flt1, flt1_stride, xqd); #endif // CONFIG_SKIP_SGR if (err2 > err) { xqd[p] += s; @@ -302,12 +302,12 @@ #if CONFIG_SKIP_SGR err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt1, - flt1_stride, flt2, flt2_stride, xqd, params); + dat_stride, use_highbitdepth, flt0, + flt0_stride, flt1, flt1_stride, xqd, params); #else // CONFIG_SKIP_SGR err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt1, - flt1_stride, flt2, flt2_stride, xqd); + dat_stride, use_highbitdepth, flt0, + flt0_stride, flt1, flt1_stride, xqd); #endif // CONFIG_SKIP_SGR if (err2 > err) { xqd[p] -= s; @@ -328,8 +328,8 @@ static void get_proj_subspace(const uint8_t *src8, int width, int height, int src_stride, const uint8_t *dat8, int dat_stride, int use_highbitdepth, - int32_t *flt1, int flt1_stride, int32_t *flt2, - int flt2_stride, int *xq + int32_t *flt0, int flt0_stride, int32_t *flt1, + int flt1_stride, int *xq #if CONFIG_SKIP_SGR , const sgr_params_type *params @@ -357,12 +357,12 @@ (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u; #if CONFIG_SKIP_SGR const double f1 = - (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0; + (params->r0 > 0) ? (double)flt0[i * flt0_stride + j] - u : 0; const double f2 = - (params->r2 > 0) ? (double)flt2[i * flt2_stride + j] - u : 0; + (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0; #else // CONFIG_SKIP_SGR - const double f1 = (double)flt1[i * flt1_stride + j] - u; - const double f2 = (double)flt2[i * flt2_stride + j] - u; + const double f1 = (double)flt0[i * flt0_stride + j] - u; + const double f2 = (double)flt1[i * flt1_stride + j] - u; #endif // CONFIG_SKIP_SGR H[0][0] += f1 * f1; H[1][1] += f2 * f2; @@ -381,12 +381,12 @@ (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u; #if CONFIG_SKIP_SGR const double f1 = - (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0; + (params->r0 > 0) ? (double)flt0[i * flt0_stride + j] - u : 0; const double f2 = - (params->r2 > 0) ? (double)flt2[i * flt2_stride + j] - u : 0; + (params->r1 > 0) ? (double)flt1[i * flt1_stride + j] - u : 0; #else // CONFIG_SKIP_SGR - const double f1 = (double)flt1[i * flt1_stride + j] - u; - const double f2 = (double)flt2[i * flt2_stride + j] - u; + const double f1 = (double)flt0[i * flt0_stride + j] - u; + const double f2 = (double)flt1[i * flt1_stride + j] - u; #endif // CONFIG_SKIP_SGR H[0][0] += f1 * f1; H[1][1] += f2 * f2; @@ -403,7 +403,7 @@ C[0] /= size; C[1] /= size; #if CONFIG_SKIP_SGR - if (params->r1 == 0) { + if (params->r0 == 0) { // H matrix is now only the scalar H[1][1] // C vector is now only the scalar C[1] Det = H[1][1]; @@ -413,7 +413,7 @@ xq[0] = 0; xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS)); - } else if (params->r2 == 0) { + } else if (params->r1 == 0) { // H matrix is now only the scalar H[0][0] // C vector is now only the scalar C[0] Det = H[0][0]; @@ -444,11 +444,11 @@ #if CONFIG_SKIP_SGR void encode_xq(int *xq, int *xqd, const sgr_params_type *params) { - if (params->r1 == 0) { + if (params->r0 == 0) { xqd[0] = 0; xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1); - } else if (params->r2 == 0) { + } else if (params->r1 == 0) { xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0); xqd[1] = 0; } else { @@ -469,19 +469,19 @@ // Apply the self-guided filter across an entire restoration unit. static void apply_sgr(const sgr_params_type *params, const uint8_t *dat8, int width, int height, int dat_stride, int use_highbd, - int bit_depth, int pu_width, int pu_height, int32_t *flt1, - int32_t *flt2, int flt_stride) { + int bit_depth, int pu_width, int pu_height, int32_t *flt0, + int32_t *flt1, int flt_stride) { for (int i = 0; i < height; i += pu_height) { const int h = AOMMIN(pu_height, height - i); + int32_t *flt0_row = flt0 + i * flt_stride; int32_t *flt1_row = flt1 + i * flt_stride; - int32_t *flt2_row = flt2 + i * flt_stride; const uint8_t *dat8_row = dat8 + i * dat_stride; // Iterate over the stripe in blocks of width pu_width for (int j = 0; j < width; j += pu_width) { const int w = AOMMIN(pu_width, width - j); - av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt1_row + j, - flt2_row + j, flt_stride, params, bit_depth, + av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt0_row + j, + flt1_row + j, flt_stride, params, bit_depth, use_highbd); } } @@ -491,8 +491,8 @@ const uint8_t *dat8, int width, int height, int dat_stride, const uint8_t *src8, int src_stride, int use_highbitdepth, int bit_depth, int pu_width, int pu_height, int32_t *rstbuf) { - int32_t *flt1 = rstbuf; - int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + int32_t *flt0 = rstbuf; + int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX; int ep, bestep = 0; int64_t besterr = -1; int exqd[2], bestxqd[2] = { 0, 0 }; @@ -507,15 +507,15 @@ int exq[2]; apply_sgr(params, dat8, width, height, dat_stride, use_highbitdepth, - bit_depth, pu_width, pu_height, flt1, flt2, flt_stride); + bit_depth, pu_width, pu_height, flt0, flt1, flt_stride); aom_clear_system_state(); #if CONFIG_SKIP_SGR get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride, - use_highbitdepth, flt1, flt_stride, flt2, flt_stride, exq, + use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq, params); #else // CONFIG_SKIP_SGR get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride, - use_highbitdepth, flt1, flt_stride, flt2, flt_stride, + use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq); #endif // CONFIG_SKIP_SGR aom_clear_system_state(); @@ -523,12 +523,12 @@ encode_xq(exq, exqd, params); int64_t err = finer_search_pixel_proj_error( src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, - flt1, flt_stride, flt2, flt_stride, 2, exqd, params); + flt0, flt_stride, flt1, flt_stride, 2, exqd, params); #else // CONFIG_SKIP_SGR encode_xq(exq, exqd); int64_t err = finer_search_pixel_proj_error( src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, - flt1, flt_stride, flt2, flt_stride, 2, exqd); + flt0, flt_stride, flt1, flt_stride, 2, exqd); #endif // CONFIG_SKIP_SGR if (besterr == -1 || err < besterr) { bestep = ep; @@ -550,12 +550,12 @@ int bits = SGRPROJ_PARAMS_BITS; #if CONFIG_SKIP_SGR const sgr_params_type *params = &sgr_params[sgrproj_info->ep]; - if (params->r1 > 0) + if (params->r0 > 0) bits += aom_count_primitive_refsubexpfin( SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0); - if (params->r2 > 0) + if (params->r1 > 0) bits += aom_count_primitive_refsubexpfin( SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,