Fix a crash in MSVC x86 Build
BUG=aomedia:3374
Change-Id: I63d6d9a803e996d6b942b4ea4d346042bc72dfe1
diff --git a/aom_dsp/x86/sad4d_avx2.c b/aom_dsp/x86/sad4d_avx2.c
index adfbd43..0fea6dd 100644
--- a/aom_dsp/x86/sad4d_avx2.c
+++ b/aom_dsp/x86/sad4d_avx2.c
@@ -16,29 +16,28 @@
#include "aom_dsp/x86/synonyms_avx2.h"
static AOM_FORCE_INLINE void aggregate_and_store_sum(uint32_t res[4],
- __m256i sum_ref0,
- __m256i sum_ref1,
- __m256i sum_ref2,
- __m256i sum_ref3) {
- __m128i sum;
+ const __m256i *sum_ref0,
+ const __m256i *sum_ref1,
+ const __m256i *sum_ref2,
+ const __m256i *sum_ref3) {
// In sum_ref-i the result is saved in the first 4 bytes and the other 4
// bytes are zeroed.
// merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3
// 0, 0, 1, 1
- sum_ref0 = _mm256_castps_si256(_mm256_shuffle_ps(
- _mm256_castsi256_ps(sum_ref0), _mm256_castsi256_ps(sum_ref1),
+ __m256i sum_ref01 = _mm256_castps_si256(_mm256_shuffle_ps(
+ _mm256_castsi256_ps(*sum_ref0), _mm256_castsi256_ps(*sum_ref1),
_MM_SHUFFLE(2, 0, 2, 0)));
// 2, 2, 3, 3
- sum_ref2 = _mm256_castps_si256(_mm256_shuffle_ps(
- _mm256_castsi256_ps(sum_ref2), _mm256_castsi256_ps(sum_ref3),
+ __m256i sum_ref23 = _mm256_castps_si256(_mm256_shuffle_ps(
+ _mm256_castsi256_ps(*sum_ref2), _mm256_castsi256_ps(*sum_ref3),
_MM_SHUFFLE(2, 0, 2, 0)));
// sum adjacent 32 bit integers
- sum_ref0 = _mm256_hadd_epi32(sum_ref0, sum_ref2);
+ __m256i sum_ref0123 = _mm256_hadd_epi32(sum_ref01, sum_ref23);
// add the low 128 bit to the high 128 bit
- sum = _mm_add_epi32(_mm256_castsi256_si128(sum_ref0),
- _mm256_extractf128_si256(sum_ref0, 1));
+ __m128i sum = _mm_add_epi32(_mm256_castsi256_si128(sum_ref0123),
+ _mm256_extractf128_si256(sum_ref0123, 1));
_mm_storeu_si128((__m128i *)(res), sum);
}
@@ -87,7 +86,7 @@
ref3 += ref_stride;
}
- aggregate_and_store_sum(res, sum_ref0, sum_ref1, sum_ref2, sum_ref3);
+ aggregate_and_store_sum(res, &sum_ref0, &sum_ref1, &sum_ref2, &sum_ref3);
}
static AOM_FORCE_INLINE void aom_sadMxNx3d_avx2(
@@ -97,6 +96,7 @@
__m256i sum_ref0, sum_ref1, sum_ref2;
int i, j;
const uint8_t *ref0, *ref1, *ref2;
+ const __m256i zero = _mm256_setzero_si256();
ref0 = ref[0];
ref1 = ref[1];
@@ -127,8 +127,7 @@
ref1 += ref_stride;
ref2 += ref_stride;
}
- aggregate_and_store_sum(res, sum_ref0, sum_ref1, sum_ref2,
- _mm256_setzero_si256());
+ aggregate_and_store_sum(res, &sum_ref0, &sum_ref1, &sum_ref2, &zero);
}
#define SADMXN_AVX2(m, n) \
@@ -189,6 +188,7 @@
__m256i src_reg, ref0_reg, ref1_reg, ref2_reg;
__m256i sum_ref0, sum_ref1, sum_ref2;
const uint8_t *ref0, *ref1, *ref2;
+ const __m256i zero = _mm256_setzero_si256();
assert(N % 2 == 0);
ref0 = ref[0];
@@ -221,8 +221,7 @@
ref2 += 2 * ref_stride;
}
- aggregate_and_store_sum(res, sum_ref0, sum_ref1, sum_ref2,
- _mm256_setzero_si256());
+ aggregate_and_store_sum(res, &sum_ref0, &sum_ref1, &sum_ref2, &zero);
}
static AOM_FORCE_INLINE void aom_sad16xNx4d_avx2(int N, const uint8_t *src,
@@ -272,7 +271,7 @@
ref3 += 2 * ref_stride;
}
- aggregate_and_store_sum(res, sum_ref0, sum_ref1, sum_ref2, sum_ref3);
+ aggregate_and_store_sum(res, &sum_ref0, &sum_ref1, &sum_ref2, &sum_ref3);
}
#define SAD16XNX3_AVX2(n) \