Fix bug in selfguided_restoration neon
Fixed bugs related to unintialization in
av1_apply_selfguided_restoration_neon and restoration_internal
functions.
BUG=b/141859709
BUG=b/141858830
Change-Id: I3359d0c8fda16f7e74296e6d34fb2070462dedf5
diff --git a/av1/common/arm/selfguided_neon.c b/av1/common/arm/selfguided_neon.c
index d1e93d7..5593bcb 100644
--- a/av1/common/arm/selfguided_neon.c
+++ b/av1/common/arm/selfguided_neon.c
@@ -376,6 +376,21 @@
w -= 8;
count++;
} while (w > 0);
+
+ // memset needed for row pixels as 2nd stage of boxsum filter uses
+ // first 2 rows of dst16, dst2 buffer which is not filled in first stage.
+ for (int x = 0; x < 2; x++) {
+ memset(dst16 + x * dst_stride, 0, (width + 4) * sizeof(*dst16));
+ memset(dst2 + x * dst_stride, 0, (width + 4) * sizeof(*dst2));
+ }
+
+ // memset needed for extra columns as 2nd stage of boxsum filter uses
+ // last 2 columns of dst16, dst2 buffer which is not filled in first stage.
+ for (int x = 2; x < height + 2; x++) {
+ int dst_offset = x * dst_stride + width + 2;
+ memset(dst16 + dst_offset, 0, 3 * sizeof(*dst16));
+ memset(dst2 + dst_offset, 0, 3 * sizeof(*dst2));
+ }
}
{
@@ -792,6 +807,21 @@
w -= 8;
count++;
} while (w > 0);
+
+ // memset needed for row pixels as 2nd stage of boxsum filter uses
+ // first 2 rows of dst1, dst2 buffer which is not filled in first stage.
+ for (int x = 0; x < 2; x++) {
+ memset(dst1 + x * dst_stride, 0, (width + 4) * sizeof(*dst1));
+ memset(dst2 + x * dst_stride, 0, (width + 4) * sizeof(*dst2));
+ }
+
+ // memset needed for extra columns as 2nd stage of boxsum filter uses
+ // last 2 columns of dst1, dst2 buffer which is not filled in first stage.
+ for (int x = 2; x < height + 2; x++) {
+ int dst_offset = x * dst_stride + width + 2;
+ memset(dst1 + dst_offset, 0, 3 * sizeof(*dst1));
+ memset(dst2 + dst_offset, 0, 3 * sizeof(*dst2));
+ }
}
{
@@ -1319,6 +1349,11 @@
dst_ptr[y + x * dst_stride] = src_ptr[y + x * src_stride];
}
}
+
+ // memeset for unintialized rows of src buffer as it needed for
+ // boxsum filter calculation.
+ for (int x = height; x < height + 5; x++)
+ memset(dst + x * dst_stride, 0, (width + 2) * sizeof(*dst));
}
#if CONFIG_AV1_HIGHBITDEPTH
@@ -1360,6 +1395,10 @@
memcpy((dst_ptr + x * dst_stride), (src_ptr + x * src_stride),
sizeof(uint16_t) * width);
}
+ // memeset for unintialized rows of src buffer as it needed for
+ // boxsum filter calculation.
+ for (int x = height; x < height + 5; x++)
+ memset(dst + x * dst_stride, 0, (width + 2) * sizeof(*dst));
}
#endif // CONFIG_AV1_HIGHBITDEPTH
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index c31b0c0..8db1d2f 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -402,7 +402,7 @@
add_proto qw/void av1_apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
# TODO(b/141858830,b/141859709): neon is currently disabled due to use of
# uninitialized memory.
-specialize qw/av1_apply_selfguided_restoration sse4_1 avx2/;
+specialize qw/av1_apply_selfguided_restoration sse4_1 avx2 neon/;
add_proto qw/int av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
diff --git a/test/selfguided_filter_test.cc b/test/selfguided_filter_test.cc
index 8446d85..c1007f1 100644
--- a/test/selfguided_filter_test.cc
+++ b/test/selfguided_filter_test.cc
@@ -412,7 +412,7 @@
#endif
// TODO(b/141858830,b/141859709): neon is currently disabled due to use of
// uninitialized memory.
-#if 0 // HAVE_NEON
+#if HAVE_NEON
const int highbd_params_neon[] = { 8, 10, 12 };
INSTANTIATE_TEST_CASE_P(
NEON, AV1HighbdSelfguidedFilterTest,