Fix visual studio build error in CDEF filtering for WIN32 build
Visual studio 32-bit build is having compiler errors when mixed
combination of 128 bit vector data types is used. This CL
resolves the error by avoiding registers using vector type.
Change-Id: I9905e964db714daef879d0b931ac073bc764758c
diff --git a/av1/common/cdef_block_avx2.c b/av1/common/cdef_block_avx2.c
index 7f4caf7..51780e6 100644
--- a/av1/common/cdef_block_avx2.c
+++ b/av1/common/cdef_block_avx2.c
@@ -138,8 +138,10 @@
partial6 = _mm256_mullo_epi32(partial6, _mm256_set1_epi32(105));
partial4a = hsum4_avx2(&partial4a, &partial5a, &partial6, &partial7a);
- v128_store_unaligned(cost_frist_8x8, _mm256_castsi256_si128(partial4a));
- v128_store_unaligned(cost_second_8x8, _mm256_extractf128_si256(partial4a, 1));
+ _mm_storeu_si128((__m128i *)cost_frist_8x8,
+ _mm256_castsi256_si128(partial4a));
+ _mm_storeu_si128((__m128i *)cost_second_8x8,
+ _mm256_extractf128_si256(partial4a, 1));
return partial4a;
}
@@ -220,22 +222,22 @@
const __m128i first_8x8_output = _mm256_castsi256_si128(max);
const __m128i second_8x8_output = _mm256_extractf128_si256(max, 1);
const __m128i cmpeg_res_00 =
- v128_cmpeq_32(first_8x8_output, _mm256_castsi256_si128(dir47));
+ _mm_cmpeq_epi32(first_8x8_output, _mm256_castsi256_si128(dir47));
const __m128i cmpeg_res_01 =
- v128_cmpeq_32(first_8x8_output, _mm256_castsi256_si128(dir03));
+ _mm_cmpeq_epi32(first_8x8_output, _mm256_castsi256_si128(dir03));
const __m128i cmpeg_res_10 =
- v128_cmpeq_32(second_8x8_output, _mm256_extractf128_si256(dir47, 1));
+ _mm_cmpeq_epi32(second_8x8_output, _mm256_extractf128_si256(dir47, 1));
const __m128i cmpeg_res_11 =
- v128_cmpeq_32(second_8x8_output, _mm256_extractf128_si256(dir03, 1));
- const v128 t_first_8x8 = v128_pack_s32_s16(cmpeg_res_00, cmpeg_res_01);
- const v128 t_second_8x8 = v128_pack_s32_s16(cmpeg_res_10, cmpeg_res_11);
+ _mm_cmpeq_epi32(second_8x8_output, _mm256_extractf128_si256(dir03, 1));
+ const __m128i t_first_8x8 = _mm_packs_epi32(cmpeg_res_01, cmpeg_res_00);
+ const __m128i t_second_8x8 = _mm_packs_epi32(cmpeg_res_11, cmpeg_res_10);
- best_cost[0] = v128_low_u32(_mm256_castsi256_si128(max));
- best_cost[1] = v128_low_u32(second_8x8_output);
- best_dir[0] = v128_movemask_8(v128_pack_s16_s8(t_first_8x8, t_first_8x8));
+ best_cost[0] = _mm_cvtsi128_si32(_mm256_castsi256_si128(max));
+ best_cost[1] = _mm_cvtsi128_si32(second_8x8_output);
+ best_dir[0] = _mm_movemask_epi8(_mm_packs_epi16(t_first_8x8, t_first_8x8));
best_dir[0] =
get_msb(best_dir[0] ^ (best_dir[0] - 1)); // Count trailing zeros
- best_dir[1] = v128_movemask_8(v128_pack_s16_s8(t_second_8x8, t_second_8x8));
+ best_dir[1] = _mm_movemask_epi8(_mm_packs_epi16(t_second_8x8, t_second_8x8));
best_dir[1] =
get_msb(best_dir[1] ^ (best_dir[1] - 1)); // Count trailing zeros