Correct sub pel offset checks for half pel point
In "aom_sub_pixel_variance32xh_avx2" function, sub pel offset
is being checked with 8 to handle special cases of half pel
which is incorrect.
Above is modified to check for an offset of 4 (which is half pel).
Change-Id: I299d73e0a9277c06235936ed0b6ab6dfd95652f1
diff --git a/aom_dsp/x86/variance_impl_avx2.c b/aom_dsp/x86/variance_impl_avx2.c
index 88e27ae..aef012b 100644
--- a/aom_dsp/x86/variance_impl_avx2.c
+++ b/aom_dsp/x86/variance_impl_avx2.c
@@ -127,8 +127,8 @@
src += src_stride;
dst += dst_stride;
}
- // x_offset = 0 and y_offset = 8
- } else if (y_offset == 8) {
+ // x_offset = 0 and y_offset = 4
+ } else if (y_offset == 4) {
__m256i src_next_reg;
for (i = 0; i < height; i++) {
LOAD_SRC_DST
@@ -156,8 +156,8 @@
dst += dst_stride;
}
}
- // x_offset = 8 and y_offset = 0
- } else if (x_offset == 8) {
+ // x_offset = 4 and y_offset = 0
+ } else if (x_offset == 4) {
if (y_offset == 0) {
__m256i src_next_reg;
for (i = 0; i < height; i++) {
@@ -169,8 +169,8 @@
src += src_stride;
dst += dst_stride;
}
- // x_offset = 8 and y_offset = 8
- } else if (y_offset == 8) {
+ // x_offset = 4 and y_offset = 4
+ } else if (y_offset == 4) {
__m256i src_next_reg, src_avg;
// load source and another source starting from the next
// following byte
@@ -189,7 +189,7 @@
CALC_SUM_SSE_INSIDE_LOOP
dst += dst_stride;
}
- // x_offset = 8 and y_offset = bilin interpolation
+ // x_offset = 4 and y_offset = bilin interpolation
} else {
__m256i filter, pw8, src_next_reg, src_avg;
y_offset <<= 5;
@@ -228,8 +228,8 @@
src += src_stride;
dst += dst_stride;
}
- // x_offset = bilin interpolation and y_offset = 8
- } else if (y_offset == 8) {
+ // x_offset = bilin interpolation and y_offset = 4
+ } else if (y_offset == 4) {
__m256i filter, pw8, src_next_reg, src_pack;
x_offset <<= 5;
filter = _mm256_load_si256(