Don't use _mm_cvtsi128_si64 on 32 bit systems

Change-Id: I332afb8d9e35cd60f05915160a5b2e1dc8757de5
diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h
index 9a10f20..8319f03 100644
--- a/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/aom_dsp/simd/v128_intrinsics_x86.h
@@ -270,7 +270,7 @@
 
 SIMD_INLINE int64_t v128_dotp_s16(v128 a, v128 b) {
   v128 r = _mm_madd_epi16(a, b);
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) && defined(__x86_64__)
   v128 c = _mm_add_epi64(_mm_cvtepi32_epi64(r),
                          _mm_cvtepi32_epi64(_mm_srli_si128(r, 8)));
   return _mm_cvtsi128_si64(_mm_add_epi64(c, _mm_srli_si128(c, 8)));
diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h
index 09eb160..bef43c4 100644
--- a/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/aom_dsp/simd/v64_intrinsics_x86.h
@@ -265,7 +265,7 @@
 
 SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) {
   __m128i r = _mm_madd_epi16(a, b);
-#if defined(__SSE4_1__)
+#if defined(__SSE4_1__) && defined(__x86_64__)
   __m128i x = _mm_cvtepi32_epi64(r);
   return _mm_cvtsi128_si64(_mm_add_epi64(x, _mm_srli_si128(x, 8)));
 #else