Fix off-by-one bug in Neon sub-pixel variance functions
The Neon sub-pixel variance functions were processing height + 1 rows
of data when applying the horizontal bilinear filter in the case
where no vertical bilinear filtering is used subsequently.
This patch fixes the off-by-one bug - which also makes the code a bit
faster.
Bug: aomedia:3368
Change-Id: Ic9e4a207afc03b75554fe1d74eda7b31b77b96e9
diff --git a/aom_dsp/arm/subpel_variance_neon.c b/aom_dsp/arm/subpel_variance_neon.c
index cea1a4c..a058860 100644
--- a/aom_dsp/arm/subpel_variance_neon.c
+++ b/aom_dsp/arm/subpel_variance_neon.c
@@ -174,30 +174,35 @@
} \
} else if (xoffset == 4) { \
uint8_t tmp0[w * (h + padding)]; \
- var_filter_block2d_avg(src, tmp0, src_stride, 1, w, h + padding); \
if (yoffset == 0) { \
+ var_filter_block2d_avg(src, tmp0, src_stride, 1, w, h); \
return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse); \
} else if (yoffset == 4) { \
uint8_t tmp1[w * (h + padding)]; \
+ var_filter_block2d_avg(src, tmp0, src_stride, 1, w, (h + padding)); \
var_filter_block2d_avg(tmp0, tmp1, w, w, w, h); \
return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse); \
} else { \
uint8_t tmp1[w * (h + padding)]; \
+ var_filter_block2d_avg(src, tmp0, src_stride, 1, w, (h + padding)); \
var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \
return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse); \
} \
} else { \
uint8_t tmp0[w * (h + padding)]; \
- var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
- xoffset); \
if (yoffset == 0) { \
+ var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, h, xoffset); \
return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse); \
} else if (yoffset == 4) { \
uint8_t tmp1[w * h]; \
+ var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
+ xoffset); \
var_filter_block2d_avg(tmp0, tmp1, w, w, w, h); \
return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse); \
} else { \
uint8_t tmp1[w * h]; \
+ var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
+ xoffset); \
var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset); \
return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse); \
} \
@@ -467,17 +472,17 @@
} else if (xoffset == 4) { \
uint8_t tmp0[w * (h + padding)]; \
if (yoffset == 0) { \
- avg_pred_var_filter_block2d_avg(src, tmp0, source_stride, 1, w, \
- h + padding, second_pred); \
+ avg_pred_var_filter_block2d_avg(src, tmp0, source_stride, 1, w, h, \
+ second_pred); \
return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse); \
} else if (yoffset == 4) { \
uint8_t tmp1[w * (h + padding)]; \
- var_filter_block2d_avg(src, tmp0, source_stride, 1, w, h + padding); \
+ var_filter_block2d_avg(src, tmp0, source_stride, 1, w, (h + padding)); \
avg_pred_var_filter_block2d_avg(tmp0, tmp1, w, w, w, h, second_pred); \
return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse); \
} else { \
uint8_t tmp1[w * (h + padding)]; \
- var_filter_block2d_avg(src, tmp0, source_stride, 1, w, h + padding); \
+ var_filter_block2d_avg(src, tmp0, source_stride, 1, w, (h + padding)); \
avg_pred_var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset, \
second_pred); \
return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse); \
@@ -485,8 +490,8 @@
} else { \
uint8_t tmp0[w * (h + padding)]; \
if (yoffset == 0) { \
- avg_pred_var_filter_block2d_bil_w##w( \
- src, tmp0, source_stride, 1, (h + padding), xoffset, second_pred); \
+ avg_pred_var_filter_block2d_bil_w##w(src, tmp0, source_stride, 1, h, \
+ xoffset, second_pred); \
return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse); \
} else if (yoffset == 4) { \
uint8_t tmp1[w * h]; \