Fix ubsan_undefined error due to misaligned load

As reported in issue 1486, there is a misaligned load
in the function av1_convolve_2d_copy_sr_sse2 which causes
error when built with -DSANITIZE=undefined.

This commit fixes the issue.

BUG=aomedia:1486

Change-Id: Ide3312213d8469dc8ae14c2c5437c79c3bb1ac20
diff --git a/av1/common/x86/convolve_2d_sse2.c b/av1/common/x86/convolve_2d_sse2.c
index 5e0502d..61c6578 100644
--- a/av1/common/x86/convolve_2d_sse2.c
+++ b/av1/common/x86/convolve_2d_sse2.c
@@ -577,20 +577,24 @@
 
   if (w == 2) {
     do {
-      *(uint16_t *)dst = *(uint16_t *)src;
+      __m128i s = _mm_loadl_epi64((__m128i *)src);
+      *(uint16_t *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
-      *(uint16_t *)dst = *(uint16_t *)src;
+      s = _mm_loadl_epi64((__m128i *)src);
+      *(uint16_t *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
       h -= 2;
     } while (h);
   } else if (w == 4) {
     do {
-      *(uint32_t *)dst = *(uint32_t *)src;
+      __m128i s = _mm_loadl_epi64((__m128i *)src);
+      *(uint32_t *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
-      *(uint32_t *)dst = *(uint32_t *)src;
+      s = _mm_loadl_epi64((__m128i *)src);
+      *(uint32_t *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
       h -= 2;