AVX2: Forced inline sadMxNx4d
About 0.3% speed up when compiled with PIC flag.
Change-Id: I7b251c13aff41ada27223a2f17a92c4cc561cecf
diff --git a/aom_dsp/x86/sad4d_avx2.c b/aom_dsp/x86/sad4d_avx2.c
index 1abeb4c..6ff8155 100644
--- a/aom_dsp/x86/sad4d_avx2.c
+++ b/aom_dsp/x86/sad4d_avx2.c
@@ -14,9 +14,9 @@
#include "aom/aom_integer.h"
-void aom_sadMxNx4d_avx2(int M, int N, const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t res[4]) {
+static AOM_FORCE_INLINE void aom_sadMxNx4d_avx2(
+ int M, int N, const uint8_t *src, int src_stride,
+ const uint8_t *const ref[4], int ref_stride, uint32_t res[4]) {
__m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
__m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
int i, j;