Resolve -Wshorten-64-to-32 in highbd variance.
For 8-bit the subtrahend is small enough to fit into uint32_t.
cherry-picked from libvpx:
47b9a0912 Resolve -Wshorten-64-to-32 in highbd variance.
This is the same that was done for:
c0241664a Resolve -Wshorten-64-to-32 in variance.
For 10/12-bit apply:
63a37d16f Prevent negative variance
BUG=aomedia:445
Change-Id: Iab35e3f3f269035e17c711bd6cc01272c3137e1d
diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index c5fb495..55fac22 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c
@@ -366,7 +366,7 @@
uint32_t *sse) { \
int sum; \
highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
} \
\
uint32_t aom_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index 26b99b9..29f96ce 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -140,7 +140,7 @@
highbd_8_variance_sse2( \
src, src_stride, ref, ref_stride, w, h, sse, &sum, \
aom_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- return *sse - (((int64_t)sum * sum) >> shift); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \
} \
\
uint32_t aom_highbd_10_variance##w##x##h##_sse2( \
@@ -298,12 +298,13 @@
} \
} \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ return sse - (uint32_t)((cast se * se) >> (wlog2 + hlog2)); \
} \
\
uint32_t aom_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr) { \
+ int64_t var; \
uint32_t sse; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
@@ -333,7 +334,8 @@
se = ROUND_POWER_OF_TWO(se, 2); \
sse = ROUND_POWER_OF_TWO(sse, 4); \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
\
uint32_t aom_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
@@ -342,6 +344,7 @@
int start_row; \
uint32_t sse; \
int se = 0; \
+ int64_t var; \
uint64_t long_sse = 0; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
@@ -380,7 +383,8 @@
se = ROUND_POWER_OF_TWO(se, 4); \
sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
#define FNS(opt) \
@@ -449,13 +453,14 @@
} \
} \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ return sse - (uint32_t)((cast se * se) >> (wlog2 + hlog2)); \
} \
\
uint32_t aom_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
const uint8_t *src8, int src_stride, int x_offset, int y_offset, \
const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
const uint8_t *sec8) { \
+ int64_t var; \
uint32_t sse; \
uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
@@ -486,7 +491,8 @@
se = ROUND_POWER_OF_TWO(se, 2); \
sse = ROUND_POWER_OF_TWO(sse, 4); \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
} \
\
uint32_t aom_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
@@ -494,6 +500,7 @@
const uint8_t *dst8, int dst_stride, uint32_t *sse_ptr, \
const uint8_t *sec8) { \
int start_row; \
+ int64_t var; \
uint32_t sse; \
int se = 0; \
uint64_t long_sse = 0; \
@@ -535,7 +542,8 @@
se = ROUND_POWER_OF_TWO(se, 4); \
sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
*sse_ptr = sse; \
- return sse - ((cast se * se) >> (wlog2 + hlog2)); \
+ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \
+ return (var >= 0) ? (uint32_t)var : 0; \
}
#define FNS(opt1) \