Fix unaligned load in av1_dist_wtd_convolve_2d_copy_avx2

Change-Id: Ibe1ba018487801d74a3c7684ce72e11af89685b6
diff --git a/av1/common/x86/jnt_convolve_avx2.c b/av1/common/x86/jnt_convolve_avx2.c
index 6ec649e..ae8f88e 100644
--- a/av1/common/x86/jnt_convolve_avx2.c
+++ b/av1/common/x86/jnt_convolve_avx2.c
@@ -1051,10 +1051,10 @@
     assert(w == 4);                                                           \
     do {                                                                      \
       __m256i src_3210_8bit =                                                 \
-          _mm256_setr_epi32(*(int32_t *)(src + 0 * src_stride),               \
-                            *(int32_t *)(src + 1 * src_stride), 0, 0,         \
-                            *(int32_t *)(src + 2 * src_stride),               \
-                            *(int32_t *)(src + 3 * src_stride), 0, 0);        \
+          _mm256_setr_epi32(loadu_int32(src + 0 * src_stride),                \
+                            loadu_int32(src + 1 * src_stride), 0, 0,          \
+                            loadu_int32(src + 2 * src_stride),                \
+                            loadu_int32(src + 3 * src_stride), 0, 0);         \
                                                                               \
       __m256i src_3210 = _mm256_unpacklo_epi8(src_3210_8bit, zero);           \
       src_3210 = _mm256_slli_epi16(src_3210, LEFT_SHIFT);                     \