Bugfix for AddressSanitizer error with MRLS

For 12 bit input, it is supposed to use 32 bit intrinsics. But 16 bit
intrinsics was wrongly used in the code. The fix is very strightforward
, which changes 16bit instructions to 32bit instruction.

BUG=aomedia:3108

Change-Id: Idd0f3c746b3909a21bba61b0a78b8401223dee20
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c
index 8dca12e..23c5b2b 100644
--- a/aom_dsp/x86/intrapred_avx2.c
+++ b/aom_dsp/x86/intrapred_avx2.c
@@ -2287,7 +2287,7 @@
       c1234 = _mm_setr_epi32(1, 2, 3, 4);
 #if CONFIG_MRLS
       __m128i c1234_ = _mm_add_epi32(c1234, cmrlIdx);
-      y_c128 = _mm_sub_epi32(r6, _mm_mullo_epi16(c1234_, dy128));
+      y_c128 = _mm_sub_epi32(r6, _mm_mullo_epi32(c1234_, dy128));
 #else
       y_c128 = _mm_sub_epi32(r6, _mm_mullo_epi32(c1234, dy128));
 #endif
@@ -2911,13 +2911,13 @@
   a16 = _mm256_set1_epi32(16);
   c1 = _mm256_srli_epi32(a16, 4);
   c8 = _mm256_srli_epi32(a16, 1);
-  min_base_y256 = _mm256_set1_epi16(min_base_y);
+  min_base_y256 = _mm256_set1_epi32(min_base_y);
   c3f = _mm256_set1_epi32(0x3f);
   dy256 = _mm256_set1_epi32(dy);
   c0123 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
   c1234 = _mm256_add_epi32(c0123, c1);
 #if CONFIG_MRLS
-  __m256i cmrlIdx = _mm256_set1_epi16(mrl_index);
+  __m256i cmrlIdx = _mm256_set1_epi32(mrl_index);
 #endif
   for (int r = 0; r < H; r++) {
     __m256i b, res, shift, ydx;
@@ -3020,7 +3020,7 @@
         r6 = _mm256_set1_epi32(r << 6);
         c256 = _mm256_add_epi32(j256, c1234);
 #if CONFIG_MRLS
-        __m256i c256_ = _mm256_add_epi16(c256, cmrlIdx);
+        __m256i c256_ = _mm256_add_epi32(c256, cmrlIdx);
         y_c256 = _mm256_sub_epi32(r6, _mm256_mullo_epi32(c256_, dy256));
 #else
         y_c256 = _mm256_sub_epi32(r6, _mm256_mullo_epi32(c256, dy256));
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 7668158..73197b8 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -36,7 +36,7 @@
 #define INTRA_EDGE_TAPS 5
 #define MAX_UPSAMPLE_SZ 16
 #if CONFIG_MRLS
-#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32 + 2 * MRL_LINE_NUMBER)
+#define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 64)
 #else
 #define NUM_INTRA_NEIGHBOUR_PIXELS (MAX_TX_SIZE * 2 + 32)
 #endif
@@ -1286,8 +1286,13 @@
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
   DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
+#if CONFIG_MRLS
+  uint16_t *const above_row = above_data + 32;
+  uint16_t *const left_col = left_data + 32;
+#else
   uint16_t *const above_row = above_data + 16;
   uint16_t *const left_col = left_data + 16;
+#endif
   const int txwpx = tx_size_wide[tx_size];
   const int txhpx = tx_size_high[tx_size];
   int need_left = extend_modes[mode] & NEED_LEFT;
@@ -1585,8 +1590,13 @@
 #endif
   DECLARE_ALIGNED(16, uint8_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   DECLARE_ALIGNED(16, uint8_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
+#if CONFIG_MRLS
+  uint8_t *const above_row = above_data + 32;
+  uint8_t *const left_col = left_data + 32;
+#else
   uint8_t *const above_row = above_data + 16;
   uint8_t *const left_col = left_data + 16;
+#endif
   const int txwpx = tx_size_wide[tx_size];
   const int txhpx = tx_size_high[tx_size];
   int need_left = extend_modes[mode] & NEED_LEFT;