Bugfix for AddressSanitizer error with MRLS For 12 bit input, it is supposed to use 32 bit intrinsics. But 16 bit intrinsics was wrongly used in the code. The fix is very strightforward , which changes 16bit instructions to 32bit instruction. BUG=aomedia:3108 Change-Id: Idd0f3c746b3909a21bba61b0a78b8401223dee20
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c index 8dca12e..23c5b2b 100644 --- a/aom_dsp/x86/intrapred_avx2.c +++ b/aom_dsp/x86/intrapred_avx2.c
@@ -2287,7 +2287,7 @@ c1234 = _mm_setr_epi32(1, 2, 3, 4); #if CONFIG_MRLS __m128i c1234_ = _mm_add_epi32(c1234, cmrlIdx); - y_c128 = _mm_sub_epi32(r6, _mm_mullo_epi16(c1234_, dy128)); + y_c128 = _mm_sub_epi32(r6, _mm_mullo_epi32(c1234_, dy128)); #else y_c128 = _mm_sub_epi32(r6, _mm_mullo_epi32(c1234, dy128)); #endif @@ -2911,13 +2911,13 @@ a16 = _mm256_set1_epi32(16); c1 = _mm256_srli_epi32(a16, 4); c8 = _mm256_srli_epi32(a16, 1); - min_base_y256 = _mm256_set1_epi16(min_base_y); + min_base_y256 = _mm256_set1_epi32(min_base_y); c3f = _mm256_set1_epi32(0x3f); dy256 = _mm256_set1_epi32(dy); c0123 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); c1234 = _mm256_add_epi32(c0123, c1); #if CONFIG_MRLS - __m256i cmrlIdx = _mm256_set1_epi16(mrl_index); + __m256i cmrlIdx = _mm256_set1_epi32(mrl_index); #endif for (int r = 0; r < H; r++) { __m256i b, res, shift, ydx; @@ -3020,7 +3020,7 @@ r6 = _mm256_set1_epi32(r << 6); c256 = _mm256_add_epi32(j256, c1234); #if CONFIG_MRLS - __m256i c256_ = _mm256_add_epi16(c256, cmrlIdx); + __m256i c256_ = _mm256_add_epi32(c256, cmrlIdx); y_c256 = _mm256_sub_epi32(r6, _mm256_mullo_epi32(c256_, dy256)); #else y_c256 = _mm256_sub_epi32(r6, _mm256_mullo_epi32(c256, dy256));
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c index 7668158..73197b8 100644 --- a/av1/common/reconintra.c +++ b/av1/common/reconintra.c
@@ -36,7 +36,7 @@ #define INTRA_EDGE_TAPS 5 #define MAX_UPSAMPLE_SZ 16 #if CONFIG_MRLS -#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32 + 2 * MRL_LINE_NUMBER) +#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 64) #else #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32) #endif @@ -1286,8 +1286,13 @@ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); +#if CONFIG_MRLS + uint16_t *const above_row = above_data + 32; + uint16_t *const left_col = left_data + 32; +#else uint16_t *const above_row = above_data + 16; uint16_t *const left_col = left_data + 16; +#endif const int txwpx = tx_size_wide[tx_size]; const int txhpx = tx_size_high[tx_size]; int need_left = extend_modes[mode] & NEED_LEFT; @@ -1585,8 +1590,13 @@ #endif DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); +#if CONFIG_MRLS + uint8_t *const above_row = above_data + 32; + uint8_t *const left_col = left_data + 32; +#else uint8_t *const above_row = above_data + 16; uint8_t *const left_col = left_data + 16; +#endif const int txwpx = tx_size_wide[tx_size]; const int txhpx = tx_size_high[tx_size]; int need_left = extend_modes[mode] & NEED_LEFT;