Move functions into a header file

This commits move a couple of static functions into a header file to
avoid code duplication.

BUG=aomedia:2629

Change-Id: I568f0a886b41302c802106f233c6d6d096a45199
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake
index 0eed337..f1b61f0 100644
--- a/aom_dsp/aom_dsp.cmake
+++ b/aom_dsp/aom_dsp.cmake
@@ -67,6 +67,7 @@
             "${AOM_ROOT}/aom_dsp/x86/highbd_intrapred_sse2.c"
             "${AOM_ROOT}/aom_dsp/x86/highbd_loopfilter_sse2.c"
             "${AOM_ROOT}/aom_dsp/x86/intrapred_sse2.c"
+            "${AOM_ROOT}/aom_dsp/x86/intrapred_x86.h"
             "${AOM_ROOT}/aom_dsp/x86/loopfilter_sse2.c"
             "${AOM_ROOT}/aom_dsp/x86/lpf_common_sse2.h"
             "${AOM_ROOT}/aom_dsp/x86/mem_sse2.h"
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c
index fc39811..58789c3 100644
--- a/aom_dsp/x86/intrapred_avx2.c
+++ b/aom_dsp/x86/intrapred_avx2.c
@@ -12,6 +12,7 @@
 #include <immintrin.h>
 
 #include "config/aom_dsp_rtcd.h"
+#include "aom_dsp/x86/intrapred_x86.h"
 #include "aom_dsp/x86/lpf_common_sse2.h"
 
 static INLINE __m256i dc_sum_64(const uint8_t *ref) {
@@ -419,28 +420,6 @@
 
 // -----------------------------------------------------------------------------
 // Rectangle
-
-// TODO(luoyi) The following two functions are shared with intrapred_sse2.c.
-// Use a header file, intrapred_common_x86.h
-static INLINE __m128i dc_sum_16_sse2(const uint8_t *ref) {
-  __m128i x = _mm_load_si128((__m128i const *)ref);
-  const __m128i zero = _mm_setzero_si128();
-  x = _mm_sad_epu8(x, zero);
-  const __m128i high = _mm_unpackhi_epi64(x, x);
-  return _mm_add_epi16(x, high);
-}
-
-static INLINE __m128i dc_sum_32_sse2(const uint8_t *ref) {
-  __m128i x0 = _mm_load_si128((__m128i const *)ref);
-  __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
-  const __m128i zero = _mm_setzero_si128();
-  x0 = _mm_sad_epu8(x0, zero);
-  x1 = _mm_sad_epu8(x1, zero);
-  x0 = _mm_add_epi16(x0, x1);
-  const __m128i high = _mm_unpackhi_epi64(x0, x0);
-  return _mm_add_epi16(x0, high);
-}
-
 void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
   const __m128i top_sum = dc_sum_32_sse2(above);
diff --git a/aom_dsp/x86/intrapred_sse2.c b/aom_dsp/x86/intrapred_sse2.c
index 5b2452c..5afef68 100644
--- a/aom_dsp/x86/intrapred_sse2.c
+++ b/aom_dsp/x86/intrapred_sse2.c
@@ -10,7 +10,7 @@
  */
 
 #include <emmintrin.h>
-
+#include "aom_dsp/x86/intrapred_x86.h"
 #include "config/aom_dsp_rtcd.h"
 
 static INLINE void dc_store_4xh(uint32_t dc, int height, uint8_t *dst,
@@ -75,25 +75,6 @@
   return _mm_sad_epu8(x, zero);
 }
 
-static INLINE __m128i dc_sum_16(const uint8_t *ref) {
-  __m128i x = _mm_load_si128((__m128i const *)ref);
-  const __m128i zero = _mm_setzero_si128();
-  x = _mm_sad_epu8(x, zero);
-  const __m128i high = _mm_unpackhi_epi64(x, x);
-  return _mm_add_epi16(x, high);
-}
-
-static INLINE __m128i dc_sum_32(const uint8_t *ref) {
-  __m128i x0 = _mm_load_si128((__m128i const *)ref);
-  __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
-  const __m128i zero = _mm_setzero_si128();
-  x0 = _mm_sad_epu8(x0, zero);
-  x1 = _mm_sad_epu8(x1, zero);
-  x0 = _mm_add_epi16(x0, x1);
-  const __m128i high = _mm_unpackhi_epi64(x0, x0);
-  return _mm_add_epi16(x0, high);
-}
-
 static INLINE __m128i dc_sum_64(const uint8_t *ref) {
   __m128i x0 = _mm_load_si128((__m128i const *)ref);
   __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
@@ -142,7 +123,7 @@
 
 void aom_dc_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
-  const __m128i sum_left = dc_sum_16(left);
+  const __m128i sum_left = dc_sum_16_sse2(left);
   __m128i sum_above = dc_sum_4(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
@@ -171,7 +152,7 @@
 
 void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
-  const __m128i sum_left = dc_sum_16(left);
+  const __m128i sum_left = dc_sum_16_sse2(left);
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
@@ -184,7 +165,7 @@
 
 void aom_dc_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
-  const __m128i sum_left = dc_sum_32(left);
+  const __m128i sum_left = dc_sum_32_sse2(left);
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
@@ -198,7 +179,7 @@
 void aom_dc_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
   const __m128i sum_left = dc_sum_4(left);
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -211,7 +192,7 @@
 void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
   const __m128i sum_left = dc_sum_8(left);
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -223,8 +204,8 @@
 
 void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
-  const __m128i sum_left = dc_sum_32(left);
-  __m128i sum_above = dc_sum_16(above);
+  const __m128i sum_left = dc_sum_32_sse2(left);
+  __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -237,7 +218,7 @@
 void aom_dc_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
   const __m128i sum_left = dc_sum_64(left);
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -249,7 +230,7 @@
 
 void aom_dc_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                 const uint8_t *above, const uint8_t *left) {
-  __m128i sum_above = dc_sum_32(above);
+  __m128i sum_above = dc_sum_32_sse2(above);
   const __m128i sum_left = dc_sum_8(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
@@ -262,8 +243,8 @@
 
 void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
-  __m128i sum_above = dc_sum_32(above);
-  const __m128i sum_left = dc_sum_16(left);
+  __m128i sum_above = dc_sum_32_sse2(above);
+  const __m128i sum_left = dc_sum_16_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -275,7 +256,7 @@
 
 void aom_dc_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
-  __m128i sum_above = dc_sum_32(above);
+  __m128i sum_above = dc_sum_32_sse2(above);
   const __m128i sum_left = dc_sum_64(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
@@ -302,7 +283,7 @@
 void aom_dc_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
   __m128i sum_above = dc_sum_64(above);
-  const __m128i sum_left = dc_sum_32(left);
+  const __m128i sum_left = dc_sum_32_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -315,7 +296,7 @@
 void aom_dc_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *above, const uint8_t *left) {
   __m128i sum_above = dc_sum_64(above);
-  const __m128i sum_left = dc_sum_16(left);
+  const __m128i sum_left = dc_sum_16_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
   uint32_t sum = _mm_cvtsi128_si32(sum_above);
@@ -395,7 +376,7 @@
 void aom_dc_top_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
@@ -408,7 +389,7 @@
 void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
@@ -422,7 +403,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
@@ -436,7 +417,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_16(above);
+  __m128i sum_above = dc_sum_16_sse2(above);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
@@ -449,7 +430,7 @@
 void aom_dc_top_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_32(above);
+  __m128i sum_above = dc_sum_32_sse2(above);
   const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
   sum_above = _mm_add_epi16(sum_above, sixteen);
   sum_above = _mm_srai_epi16(sum_above, 5);
@@ -463,7 +444,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_32(above);
+  __m128i sum_above = dc_sum_32_sse2(above);
   const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
   sum_above = _mm_add_epi16(sum_above, sixteen);
   sum_above = _mm_srai_epi16(sum_above, 5);
@@ -477,7 +458,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)left;
-  __m128i sum_above = dc_sum_32(above);
+  __m128i sum_above = dc_sum_32_sse2(above);
   const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
   sum_above = _mm_add_epi16(sum_above, sixteen);
   sum_above = _mm_srai_epi16(sum_above, 5);
@@ -550,7 +531,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_16(left);
+  __m128i sum_left = dc_sum_16_sse2(left);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
@@ -577,7 +558,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_16(left);
+  __m128i sum_left = dc_sum_16_sse2(left);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
@@ -590,7 +571,7 @@
                                      const uint8_t *above,
                                      const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_32(left);
+  __m128i sum_left = dc_sum_32_sse2(left);
   const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
   sum_left = _mm_add_epi16(sum_left, sixteen);
   sum_left = _mm_srai_epi16(sum_left, 5);
@@ -631,7 +612,7 @@
                                       const uint8_t *above,
                                       const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_32(left);
+  __m128i sum_left = dc_sum_32_sse2(left);
   const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
   sum_left = _mm_add_epi16(sum_left, sixteen);
   sum_left = _mm_srai_epi16(sum_left, 5);
@@ -673,7 +654,7 @@
                                       const uint8_t *above,
                                       const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_16(left);
+  __m128i sum_left = dc_sum_16_sse2(left);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
@@ -715,7 +696,7 @@
                                       const uint8_t *above,
                                       const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_32(left);
+  __m128i sum_left = dc_sum_32_sse2(left);
   const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
   sum_left = _mm_add_epi16(sum_left, sixteen);
   sum_left = _mm_srai_epi16(sum_left, 5);
@@ -729,7 +710,7 @@
                                       const uint8_t *above,
                                       const uint8_t *left) {
   (void)above;
-  __m128i sum_left = dc_sum_16(left);
+  __m128i sum_left = dc_sum_16_sse2(left);
   const __m128i eight = _mm_set1_epi16((uint16_t)8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
diff --git a/aom_dsp/x86/intrapred_x86.h b/aom_dsp/x86/intrapred_x86.h
new file mode 100644
index 0000000..b13f575
--- /dev/null
+++ b/aom_dsp/x86/intrapred_x86.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AOM_DSP_X86_INTRAPRED_X86_H_
+#define AOM_AOM_DSP_X86_INTRAPRED_X86_H_
+
+#include <emmintrin.h>  // SSE2
+#include "aom/aom_integer.h"
+#include "config/aom_config.h"
+
+static INLINE __m128i dc_sum_16_sse2(const uint8_t *ref) {
+  __m128i x = _mm_load_si128((__m128i const *)ref);
+  const __m128i zero = _mm_setzero_si128();
+  x = _mm_sad_epu8(x, zero);
+  const __m128i high = _mm_unpackhi_epi64(x, x);
+  return _mm_add_epi16(x, high);
+}
+
+static INLINE __m128i dc_sum_32_sse2(const uint8_t *ref) {
+  __m128i x0 = _mm_load_si128((__m128i const *)ref);
+  __m128i x1 = _mm_load_si128((__m128i const *)(ref + 16));
+  const __m128i zero = _mm_setzero_si128();
+  x0 = _mm_sad_epu8(x0, zero);
+  x1 = _mm_sad_epu8(x1, zero);
+  x0 = _mm_add_epi16(x0, x1);
+  const __m128i high = _mm_unpackhi_epi64(x0, x0);
+  return _mm_add_epi16(x0, high);
+}
+
+#endif  // AOM_AOM_DSP_X86_INTRAPRED_X86_H_