Improve SSE2 implementation of v64_abs_s8 and v128_abs_s8
Change-Id: I5243432106c2456f1220adb9d8f24ae5e4249748
diff --git a/aom_dsp/simd/v128_intrinsics_x86.h b/aom_dsp/simd/v128_intrinsics_x86.h
index ce078cb..3043269 100644
--- a/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/aom_dsp/simd/v128_intrinsics_x86.h
@@ -124,9 +124,8 @@
#if defined(__SSSE3__)
return _mm_abs_epi8(a);
#else
- v128 t = _mm_sub_epi8(_mm_setzero_si128(), a);
- v128 mask = _mm_cmplt_epi8(t, a);
- return _mm_or_si128(_mm_andnot_si128(mask, t), _mm_and_si128(mask, a));
+ v128 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
+ return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
#endif
}
diff --git a/aom_dsp/simd/v64_intrinsics_x86.h b/aom_dsp/simd/v64_intrinsics_x86.h
index b0296fd..6dec36e 100644
--- a/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/aom_dsp/simd/v64_intrinsics_x86.h
@@ -142,9 +142,8 @@
#if defined(__SSSE3__)
return _mm_abs_epi8(a);
#else
- v64 t = _mm_sub_epi8(_mm_setzero_si128(), a);
- v64 mask = _mm_cmplt_epi8(t, a);
- return _mm_or_si128(_mm_andnot_si128(mask, t), _mm_and_si128(mask, a));
+ v64 sign = _mm_cmplt_epi8(a, _mm_setzero_si128());
+ return _mm_xor_si128(sign, _mm_add_epi8(a, sign));
#endif
}