AVX2: Forced inline sadMxNx4d

About 0.3% speed up when compiled with PIC flag.

Change-Id: I7b251c13aff41ada27223a2f17a92c4cc561cecf
diff --git a/aom_dsp/x86/sad4d_avx2.c b/aom_dsp/x86/sad4d_avx2.c
index 1abeb4c..6ff8155 100644
--- a/aom_dsp/x86/sad4d_avx2.c
+++ b/aom_dsp/x86/sad4d_avx2.c
@@ -14,9 +14,9 @@
 
 #include "aom/aom_integer.h"
 
-void aom_sadMxNx4d_avx2(int M, int N, const uint8_t *src, int src_stride,
-                        const uint8_t *const ref[4], int ref_stride,
-                        uint32_t res[4]) {
+static AOM_FORCE_INLINE void aom_sadMxNx4d_avx2(
+    int M, int N, const uint8_t *src, int src_stride,
+    const uint8_t *const ref[4], int ref_stride, uint32_t res[4]) {
   __m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
   __m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
   int i, j;