Add INLINE for compute_jnt_comp_avg Some compiler such as msvc 2013 doesn't inline this small function and the highbd version. For encoder, about 1.1% speed up after adding INLINE, shows by encode 10 frames of foreman_cif. Change-Id: I8aab0819ae8660eb8b849c70082c678dfbf581fb

commit: 2a21b102d8c48f75a6ae3aa875c0b6f738da4bc0 [log] [tgz]
author: Peng Bin <binpengsmail@gmail.com> Mon Mar 26 13:20:11 2018 +0800
committer: Bin Peng <binpengsmail@gmail.com> Tue Mar 27 02:04:58 2018 +0000
tree: 208feaadec0891af75a3d0098b4cd0cd2f7b21cb
parent: 97095fb7f61d161e60bba8c20f01e49c2427a6af [diff]
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index ceedd0f..84ffa88 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c

@@ -678,9 +678,11 @@
   }
 }
 
-static void highbd_compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
-                                        const __m128i *w0, const __m128i *w1,
-                                        const __m128i *r, void *const result) {
+static INLINE void highbd_compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
+                                               const __m128i *w0,
+                                               const __m128i *w1,
+                                               const __m128i *r,
+                                               void *const result) {
   __m128i mult0 = _mm_mullo_epi16(*p0, *w0);
   __m128i mult1 = _mm_mullo_epi16(*p1, *w1);
   __m128i sum = _mm_add_epi16(mult0, mult1);

diff --git a/aom_dsp/x86/jnt_variance_ssse3.c b/aom_dsp/x86/jnt_variance_ssse3.c
index c731af0..79417ac 100644
--- a/aom_dsp/x86/jnt_variance_ssse3.c
+++ b/aom_dsp/x86/jnt_variance_ssse3.c

@@ -136,8 +136,9 @@
   }
 }
 
-static void compute_jnt_comp_avg(__m128i *p0, __m128i *p1, const __m128i *w,
-                                 const __m128i *r, void *const result) {
+static INLINE void compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
+                                        const __m128i *w, const __m128i *r,
+                                        void *const result) {
   __m128i p_lo = _mm_unpacklo_epi8(*p0, *p1);
   __m128i mult_lo = _mm_maddubs_epi16(p_lo, *w);
   __m128i round_lo = _mm_add_epi16(mult_lo, *r);
commit	2a21b102d8c48f75a6ae3aa875c0b6f738da4bc0	[log] [tgz]
author	Peng Bin <binpengsmail@gmail.com>	Mon Mar 26 13:20:11 2018 +0800
committer	Bin Peng <binpengsmail@gmail.com>	Tue Mar 27 02:04:58 2018 +0000
tree	208feaadec0891af75a3d0098b4cd0cd2f7b21cb
parent	97095fb7f61d161e60bba8c20f01e49c2427a6af [diff]