aom_dsp: remove x86inc.asm distinction
Manually cherrypicked from libvpx/master:
1b833d63d9c82270e4ea588541d14e9111c64c79
a4f3751be5f012d66011ddc1c5f12bd12734a1d3
Change-Id: I3b82e54a3173ac1458a13f33fd36094fec066f1c
diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk
index c04d955..896b729 100644
--- a/aom_dsp/aom_dsp.mk
+++ b/aom_dsp/aom_dsp.mk
@@ -65,18 +65,14 @@
DSP_SRCS-yes += daalaboolwriter.h
endif
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/aom_subpixel_8t_ssse3.asm
-endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/highbd_intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm
-endif # CONFIG_USE_X86INC
endif # CONFIG_AOM_HIGHBITDEPTH
DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM)
@@ -116,9 +112,7 @@
DSP_SRCS-$(HAVE_SSE2) += x86/aom_high_subpixel_8t_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/aom_high_subpixel_bilinear_sse2.asm
endif
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/aom_convolve_copy_sse2.asm
-endif
ifeq ($(HAVE_NEON_ASM),yes)
DSP_SRCS-yes += arm/aom_convolve_copy_neon_asm$(ASM)
@@ -209,10 +203,8 @@
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
endif
-endif
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h
DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c
@@ -227,12 +219,10 @@
DSP_SRCS-yes += inv_txfm.c
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.c
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/inv_wht_sse2.asm
ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3_x86_64.asm
endif # ARCH_X86_64
-endif # CONFIG_USE_X86INC
ifeq ($(HAVE_NEON_ASM),yes)
DSP_SRCS-yes += arm/save_reg_neon$(ASM)
@@ -284,11 +274,9 @@
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
endif
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
endif
-endif
# avg
DSP_SRCS-yes += avg.c
@@ -296,10 +284,8 @@
DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
ifeq ($(ARCH_X86_64),yes)
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm
endif
-endif
endif # CONFIG_AV1_ENCODER
@@ -321,7 +307,6 @@
DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE) += x86/sad_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
@@ -332,7 +317,6 @@
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
endif # CONFIG_AOM_HIGHBITDEPTH
-endif # CONFIG_USE_X86INC
endif # CONFIG_ENCODERS
@@ -363,17 +347,13 @@
DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm
endif # ARCH_X86_64
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/subpel_variance_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3
-endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm
-ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subpel_variance_impl_sse2.asm
-endif # CONFIG_USE_X86INC
endif # CONFIG_AOM_HIGHBITDEPTH
ifeq ($(CONFIG_MOTION_VAR),yes)
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 027c9fa..fc2535d 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -11,29 +11,6 @@
}
forward_decls qw/aom_dsp_forward_decls/;
-# x86inc.asm had specific constraints. break it out so it's easy to disable.
-# zero all the variables to avoid tricky else conditions.
-$mmx_x86inc = $sse_x86inc = $sse2_x86inc = $ssse3_x86inc = $avx_x86inc =
- $avx2_x86inc = '';
-$mmx_x86_64_x86inc = $sse_x86_64_x86inc = $sse2_x86_64_x86inc =
- $ssse3_x86_64_x86inc = $avx_x86_64_x86inc = $avx2_x86_64_x86inc = '';
-if (aom_config("CONFIG_USE_X86INC") eq "yes") {
- $mmx_x86inc = 'mmx';
- $sse_x86inc = 'sse';
- $sse2_x86inc = 'sse2';
- $ssse3_x86inc = 'ssse3';
- $avx_x86inc = 'avx';
- $avx2_x86inc = 'avx2';
- if ($opts{arch} eq "x86_64") {
- $mmx_x86_64_x86inc = 'mmx';
- $sse_x86_64_x86inc = 'sse';
- $sse2_x86_64_x86inc = 'sse2';
- $ssse3_x86_64_x86inc = 'ssse3';
- $avx_x86_64_x86inc = 'avx';
- $avx2_x86_64_x86inc = 'avx2';
- }
-}
-
# optimizations which depend on multiple features
$avx2_ssse3 = '';
if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
@@ -126,7 +103,7 @@
specialize qw/aom_d63f_predictor_4x4/;
add_proto qw/void aom_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_h_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
add_proto qw/void aom_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_he_predictor_4x4/;
@@ -138,28 +115,28 @@
specialize qw/aom_d135_predictor_4x4 neon/;
add_proto qw/void aom_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_d153_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/aom_d153_predictor_4x4 ssse3/;
add_proto qw/void aom_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_v_predictor_4x4 neon msa/, "$sse2_x86inc";
+specialize qw/aom_v_predictor_4x4 neon msa sse2/;
add_proto qw/void aom_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_ve_predictor_4x4/;
add_proto qw/void aom_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_tm_predictor_4x4 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_tm_predictor_4x4 neon dspr2 msa sse2/;
add_proto qw/void aom_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_predictor_4x4 dspr2 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
add_proto qw/void aom_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_top_predictor_4x4 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
add_proto qw/void aom_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_left_predictor_4x4 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
add_proto qw/void aom_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_128_predictor_4x4 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
add_proto qw/void aom_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d207e_predictor_8x8/;
@@ -171,7 +148,7 @@
specialize qw/aom_d63e_predictor_8x8/;
add_proto qw/void aom_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_h_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
add_proto qw/void aom_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d117_predictor_8x8/;
@@ -180,25 +157,25 @@
specialize qw/aom_d135_predictor_8x8/;
add_proto qw/void aom_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_d153_predictor_8x8/, "$ssse3_x86inc";
+specialize qw/aom_d153_predictor_8x8 ssse3/;
add_proto qw/void aom_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_v_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/aom_v_predictor_8x8 neon msa sse2/;
add_proto qw/void aom_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_tm_predictor_8x8 neon dspr2 msa sse2/;
add_proto qw/void aom_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_predictor_8x8 dspr2 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
add_proto qw/void aom_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_top_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
add_proto qw/void aom_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_left_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
add_proto qw/void aom_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_128_predictor_8x8 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
add_proto qw/void aom_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d207e_predictor_16x16/;
@@ -210,7 +187,7 @@
specialize qw/aom_d63e_predictor_16x16/;
add_proto qw/void aom_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_h_predictor_16x16 neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
add_proto qw/void aom_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d117_predictor_16x16/;
@@ -219,25 +196,25 @@
specialize qw/aom_d135_predictor_16x16/;
add_proto qw/void aom_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_d153_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/aom_d153_predictor_16x16 ssse3/;
add_proto qw/void aom_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_v_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/aom_v_predictor_16x16 neon msa sse2/;
add_proto qw/void aom_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/aom_tm_predictor_16x16 neon msa sse2/;
add_proto qw/void aom_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
add_proto qw/void aom_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
add_proto qw/void aom_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
add_proto qw/void aom_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
+specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
add_proto qw/void aom_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d207e_predictor_32x32/;
@@ -249,7 +226,7 @@
specialize qw/aom_d63e_predictor_32x32/;
add_proto qw/void aom_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_h_predictor_32x32 neon msa/, "$sse2_x86inc";
+specialize qw/aom_h_predictor_32x32 neon msa sse2/;
add_proto qw/void aom_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/aom_d117_predictor_32x32/;
@@ -258,25 +235,25 @@
specialize qw/aom_d135_predictor_32x32/;
add_proto qw/void aom_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_d153_predictor_32x32/, "$ssse3_x86inc";
+specialize qw/aom_d153_predictor_32x32 ssse3/;
add_proto qw/void aom_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_v_predictor_32x32 neon msa/, "$sse2_x86inc";
+specialize qw/aom_v_predictor_32x32 neon msa sse2/;
add_proto qw/void aom_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_tm_predictor_32x32 neon msa/, "$sse2_x86inc";
+specialize qw/aom_tm_predictor_32x32 neon msa sse2/;
add_proto qw/void aom_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_predictor_32x32 msa neon sse2/;
add_proto qw/void aom_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_top_predictor_32x32 msa neon sse2/;
add_proto qw/void aom_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_left_predictor_32x32 msa neon sse2/;
add_proto qw/void aom_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/aom_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
+specialize qw/aom_dc_128_predictor_32x32 msa neon sse2/;
# High bitdepth functions
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
@@ -302,13 +279,13 @@
specialize qw/aom_highbd_d153_predictor_4x4/;
add_proto qw/void aom_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_v_predictor_4x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_v_predictor_4x4 sse2/;
add_proto qw/void aom_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_tm_predictor_4x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_tm_predictor_4x4 sse2/;
add_proto qw/void aom_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_dc_predictor_4x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_dc_predictor_4x4 sse2/;
add_proto qw/void aom_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/aom_highbd_dc_top_predictor_4x4/;
@@ -341,13 +318,13 @@
specialize qw/aom_highbd_d153_predictor_8x8/;
add_proto qw/void aom_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_v_predictor_8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_v_predictor_8x8 sse2/;
add_proto qw/void aom_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_tm_predictor_8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_tm_predictor_8x8 sse2/;
add_proto qw/void aom_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
+ specialize qw/aom_highbd_dc_predictor_8x8 sse2/;;
add_proto qw/void aom_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/aom_highbd_dc_top_predictor_8x8/;
@@ -380,13 +357,13 @@
specialize qw/aom_highbd_d153_predictor_16x16/;
add_proto qw/void aom_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_v_predictor_16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_v_predictor_16x16 sse2/;
add_proto qw/void aom_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_tm_predictor_16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_tm_predictor_16x16 sse2/;
add_proto qw/void aom_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_dc_predictor_16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_dc_predictor_16x16 sse2/;
add_proto qw/void aom_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/aom_highbd_dc_top_predictor_16x16/;
@@ -419,13 +396,13 @@
specialize qw/aom_highbd_d153_predictor_32x32/;
add_proto qw/void aom_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_v_predictor_32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_v_predictor_32x32 sse2/;
add_proto qw/void aom_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_tm_predictor_32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_tm_predictor_32x32 sse2/;
add_proto qw/void aom_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/aom_highbd_dc_predictor_32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_dc_predictor_32x32 sse2/;
add_proto qw/void aom_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/aom_highbd_dc_top_predictor_32x32/;
@@ -441,10 +418,10 @@
# Sub Pixel Filters
#
add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/aom_convolve_copy neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_convolve_copy neon dspr2 msa sse2/;
add_proto qw/void aom_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/aom_convolve_avg neon dspr2 msa/, "$sse2_x86inc";
+specialize qw/aom_convolve_avg neon dspr2 msa sse2/;
add_proto qw/void aom_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
specialize qw/aom_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
@@ -487,10 +464,10 @@
# Sub Pixel Filters
#
add_proto qw/void aom_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/aom_highbd_convolve_copy/, "$sse2_x86inc";
+ specialize qw/aom_highbd_convolve_copy sse2/;
add_proto qw/void aom_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/aom_highbd_convolve_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_convolve_avg sse2/;
add_proto qw/void aom_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/aom_highbd_convolve8/, "$sse2_x86_64";
@@ -671,7 +648,7 @@
specialize qw/aom_fdct4x4_1 sse2/;
add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
add_proto qw/void aom_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/aom_fdct8x8_1 sse2 neon msa/;
@@ -703,7 +680,7 @@
specialize qw/aom_iwht4x4_1_add/;
add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_iwht4x4_16_add/, "$sse2_x86inc";
+ specialize qw/aom_iwht4x4_16_add sse2/;
add_proto qw/void aom_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct4x4_1_add/;
@@ -789,10 +766,10 @@
specialize qw/aom_idct4x4_1_add sse2/;
add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct8x8_64_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct8x8_64_add sse2/, "$ssse3_x86_64";
add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct8x8_12_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct8x8_12_add sse2/, "$ssse3_x86_64";
add_proto qw/void aom_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/aom_idct8x8_1_add sse2/;
@@ -807,15 +784,15 @@
specialize qw/aom_idct16x16_1_add sse2/;
add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct32x32_1024_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct32x32_1024_add sse2/, "$ssse3_x86_64";
add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct32x32_135_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct32x32_135_add sse2/, "$ssse3_x86_64";
# Need to add 135 eob idct32x32 implementations.
$aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct32x32_34_add sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct32x32_34_add sse2/, "$ssse3_x86_64";
add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/aom_idct32x32_1_add sse2/;
@@ -890,10 +867,10 @@
specialize qw/aom_idct8x8_1_add sse2 neon dspr2 msa/;
add_proto qw/void aom_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void aom_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void aom_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/aom_idct16x16_1_add sse2 neon dspr2 msa/;
@@ -905,10 +882,10 @@
specialize qw/aom_idct16x16_10_add sse2 neon dspr2 msa/;
add_proto qw/void aom_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct32x32_1024_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void aom_idct32x32_135_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct32x32_135_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
# Need to add 135 eob idct32x32 implementations.
$aom_idct32x32_135_add_sse2=aom_idct32x32_1024_add_sse2;
$aom_idct32x32_135_add_neon=aom_idct32x32_1024_add_neon;
@@ -916,7 +893,7 @@
$aom_idct32x32_135_add_msa=aom_idct32x32_1024_add_msa;
add_proto qw/void aom_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_idct32x32_34_add sse2 neon_asm dspr2 msa/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_idct32x32_34_add sse2 neon_asm dspr2 msa/, "$ssse3_x86_64";
# Need to add 34 eob idct32x32 neon implementation.
$aom_idct32x32_34_add_neon_asm=aom_idct32x32_1024_add_neon;
@@ -927,7 +904,7 @@
specialize qw/aom_iwht4x4_1_add msa/;
add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/aom_iwht4x4_16_add msa/, "$sse2_x86inc";
+ specialize qw/aom_iwht4x4_16_add msa sse2/;
} # CONFIG_EMULATE_HARDWARE
} # CONFIG_AOM_HIGHBITDEPTH
} # CONFIG_AV1
@@ -950,10 +927,10 @@
} else {
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
+ specialize qw/aom_quantize_b sse2/, "$ssse3_x86_64", "$avx_x86_64";
add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
+ specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64";
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
@@ -989,49 +966,49 @@
# Block subtraction
#
add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/aom_subtract_block neon msa/, "$sse2_x86inc";
+specialize qw/aom_subtract_block neon msa sse2/;
#
# Single block SAD
#
add_proto qw/unsigned int aom_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad64x64 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad64x64 avx2 neon msa sse2/;
add_proto qw/unsigned int aom_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad64x32 avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad64x32 avx2 msa sse2/;
add_proto qw/unsigned int aom_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad32x64 avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x64 avx2 msa sse2/;
add_proto qw/unsigned int aom_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad32x32 avx2 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x32 avx2 neon msa sse2/;
add_proto qw/unsigned int aom_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad32x16 avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x16 avx2 msa sse2/;
add_proto qw/unsigned int aom_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad16x32 msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x32 msa sse2/;
add_proto qw/unsigned int aom_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad16x16 media neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x16 media neon msa sse2/;
add_proto qw/unsigned int aom_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad16x8 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x8 neon msa sse2/;
add_proto qw/unsigned int aom_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad8x16 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x16 neon msa sse2/;
add_proto qw/unsigned int aom_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad8x8 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x8 neon msa sse2/;
add_proto qw/unsigned int aom_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad8x4 msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x4 msa sse2/;
add_proto qw/unsigned int aom_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad4x8 msa/, "$sse2_x86inc";
+specialize qw/aom_sad4x8 msa sse2/;
add_proto qw/unsigned int aom_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/aom_sad4x4 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad4x4 neon msa sse2/;
#
# OBMC SAD
@@ -1091,7 +1068,7 @@
specialize qw/aom_minmax_8x8 sse2/;
add_proto qw/void aom_hadamard_8x8/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
- specialize qw/aom_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
+ specialize qw/aom_hadamard_8x8 sse2/, "$ssse3_x86_64";
add_proto qw/void aom_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
specialize qw/aom_hadamard_16x16 sse2/;
@@ -1110,43 +1087,43 @@
} # CONFIG_AV1_ENCODER
add_proto qw/unsigned int aom_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad64x64_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad64x64_avg avx2 msa sse2/;
add_proto qw/unsigned int aom_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad64x32_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad64x32_avg avx2 msa sse2/;
add_proto qw/unsigned int aom_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad32x64_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x64_avg avx2 msa sse2/;
add_proto qw/unsigned int aom_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad32x32_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x32_avg avx2 msa sse2/;
add_proto qw/unsigned int aom_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad32x16_avg avx2 msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x16_avg avx2 msa sse2/;
add_proto qw/unsigned int aom_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad16x32_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x32_avg msa sse2/;
add_proto qw/unsigned int aom_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad16x16_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x16_avg msa sse2/;
add_proto qw/unsigned int aom_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad16x8_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x8_avg msa sse2/;
add_proto qw/unsigned int aom_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad8x16_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x16_avg msa sse2/;
add_proto qw/unsigned int aom_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad8x8_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x8_avg msa sse2/;
add_proto qw/unsigned int aom_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad8x4_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x4_avg msa sse2/;
add_proto qw/unsigned int aom_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad4x8_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad4x8_avg msa sse2/;
add_proto qw/unsigned int aom_sad4x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/aom_sad4x4_avg msa/, "$sse2_x86inc";
+specialize qw/aom_sad4x4_avg msa sse2/;
#
# Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally
@@ -1205,43 +1182,43 @@
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void aom_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad64x64x4d avx2 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad64x64x4d avx2 neon msa sse2/;
add_proto qw/void aom_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad64x32x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad64x32x4d msa sse2/;
add_proto qw/void aom_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad32x64x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x64x4d msa sse2/;
add_proto qw/void aom_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad32x32x4d avx2 neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x32x4d avx2 neon msa sse2/;
add_proto qw/void aom_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad32x16x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad32x16x4d msa sse2/;
add_proto qw/void aom_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad16x32x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x32x4d msa sse2/;
add_proto qw/void aom_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad16x16x4d neon msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x16x4d neon msa sse2/;
add_proto qw/void aom_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad16x8x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad16x8x4d msa sse2/;
add_proto qw/void aom_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad8x16x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x16x4d msa sse2/;
add_proto qw/void aom_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad8x8x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x8x4d msa sse2/;
add_proto qw/void aom_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad8x4x4d msa/, "$sse2_x86inc";
+specialize qw/aom_sad8x4x4d msa sse2/;
add_proto qw/void aom_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad4x8x4d msa/, "$sse_x86inc";
+specialize qw/aom_sad4x8x4d msa sse2/;
add_proto qw/void aom_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
-specialize qw/aom_sad4x4x4d msa/, "$sse_x86inc";
+specialize qw/aom_sad4x4x4d msa sse2/;
#
# Structured Similarity (SSIM)
@@ -1265,37 +1242,37 @@
# Single block SAD
#
add_proto qw/unsigned int aom_highbd_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad64x64 sse2/;
add_proto qw/unsigned int aom_highbd_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad64x32 sse2/;
add_proto qw/unsigned int aom_highbd_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x64 sse2/;
add_proto qw/unsigned int aom_highbd_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x32 sse2/;
add_proto qw/unsigned int aom_highbd_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x16 sse2/;
add_proto qw/unsigned int aom_highbd_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x32 sse2/;
add_proto qw/unsigned int aom_highbd_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x16 sse2/;
add_proto qw/unsigned int aom_highbd_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x8 sse2/;
add_proto qw/unsigned int aom_highbd_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x16 sse2/;
add_proto qw/unsigned int aom_highbd_sad8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x8 sse2/;
add_proto qw/unsigned int aom_highbd_sad8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
- specialize qw/aom_highbd_sad8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x4 sse2/;
add_proto qw/unsigned int aom_highbd_sad4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/aom_highbd_sad4x8/;
@@ -1314,37 +1291,37 @@
specialize qw/aom_highbd_minmax_8x8/;
add_proto qw/unsigned int aom_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad64x64_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad64x64_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad64x32_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad64x32_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad32x64_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x64_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad32x32_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x32_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad32x16_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x16_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad16x32_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x32_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad16x16_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x16_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad16x8_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x8_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad8x16_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x16_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad8x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad8x8_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x8_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad8x4_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
- specialize qw/aom_highbd_sad8x4_avg/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x4_avg sse2/;
add_proto qw/unsigned int aom_highbd_sad4x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/aom_highbd_sad4x8_avg/;
@@ -1409,43 +1386,43 @@
# Multi-block SAD, comparing a reference to N independent blocks
#
add_proto qw/void aom_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad64x64x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad64x64x4d sse2/;
add_proto qw/void aom_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad64x32x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad64x32x4d sse2/;
add_proto qw/void aom_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad32x64x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x64x4d sse2/;
add_proto qw/void aom_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad32x32x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x32x4d sse2/;
add_proto qw/void aom_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad32x16x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad32x16x4d sse2/;
add_proto qw/void aom_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad16x32x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x32x4d sse2/;
add_proto qw/void aom_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad16x16x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x16x4d sse2/;
add_proto qw/void aom_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad16x8x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad16x8x4d sse2/;
add_proto qw/void aom_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad8x16x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x16x4d sse2/;
add_proto qw/void aom_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad8x8x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x8x4d sse2/;
add_proto qw/void aom_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad8x4x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad8x4x4d sse2/;
add_proto qw/void aom_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad4x8x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad4x8x4d sse2/;
add_proto qw/void aom_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, uint32_t *sad_array";
- specialize qw/aom_highbd_sad4x4x4d/, "$sse2_x86inc";
+ specialize qw/aom_highbd_sad4x4x4d sse2/;
#
# Structured Similarity (SSIM)
@@ -1546,82 +1523,82 @@
# Subpixel Variance
#
add_proto qw/uint32_t aom_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance64x64 avx2 neon msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance64x32 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance32x64 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance32x32 avx2 neon msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance32x16 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance16x32 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance16x16 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance16x16 media neon msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance16x8 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance8x16 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance8x8 media neon msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance8x8 media neon msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance8x4 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance4x8 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_sub_pixel_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_variance4x4 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance64x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance32x64 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance32x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance16x32 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance16x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance16x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance8x16 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance8x8 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance8x4 msa/, "$sse2_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance4x8 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_sub_pixel_avg_variance4x4 msa/, "$sse_x86inc", "$ssse3_x86inc";
+ specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
#
# Specialty Subpixel
@@ -1777,217 +1754,217 @@
# Subpixel Variance
#
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_12_sub_pixel_variance8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_10_sub_pixel_variance8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/aom_highbd_8_sub_pixel_variance8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2/;
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2/;
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
- specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4/, "$sse2_x86inc";
+ specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2/;
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index d2ebc34..38a6a1e 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -250,7 +250,6 @@
return *sse;
}
-#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in
// highbd_subpel_variance_impl_sse2.asm
@@ -557,7 +556,6 @@
#undef FNS
#undef FN
-#endif // CONFIG_USE_X86INC
void aom_highbd_upsampled_pred_sse2(uint16_t *pred, int width, int height,
const uint8_t *ref8, const int ref_stride) {
diff --git a/aom_dsp/x86/sad4d_sse2.asm b/aom_dsp/x86/sad4d_sse2.asm
index d48f3fd..f27015f 100644
--- a/aom_dsp/x86/sad4d_sse2.asm
+++ b/aom_dsp/x86/sad4d_sse2.asm
@@ -23,33 +23,41 @@
movd m4, [ref2q+%3]
movd m7, [ref3q+%3]
movd m5, [ref4q+%3]
- punpckldq m0, [srcq +%4]
- punpckldq m6, [ref1q+%5]
- punpckldq m4, [ref2q+%5]
- punpckldq m7, [ref3q+%5]
- punpckldq m5, [ref4q+%5]
+ movd m1, [srcq +%4]
+ movd m2, [ref1q+%5]
+ punpckldq m0, m1
+ punpckldq m6, m2
+ movd m1, [ref2q+%5]
+ movd m2, [ref3q+%5]
+ movd m3, [ref4q+%5]
+ punpckldq m4, m1
+ punpckldq m7, m2
+ punpckldq m5, m3
+ movlhps m0, m0
+ movlhps m6, m4
+ movlhps m7, m5
psadbw m6, m0
- psadbw m4, m0
psadbw m7, m0
- psadbw m5, m0
- punpckldq m6, m4
- punpckldq m7, m5
%else
movd m1, [ref1q+%3]
+ movd m5, [ref1q+%5]
movd m2, [ref2q+%3]
+ movd m4, [ref2q+%5]
+ punpckldq m1, m5
+ punpckldq m2, m4
movd m3, [ref3q+%3]
+ movd m5, [ref3q+%5]
+ punpckldq m3, m5
movd m4, [ref4q+%3]
- punpckldq m0, [srcq +%4]
- punpckldq m1, [ref1q+%5]
- punpckldq m2, [ref2q+%5]
- punpckldq m3, [ref3q+%5]
- punpckldq m4, [ref4q+%5]
+ movd m5, [ref4q+%5]
+ punpckldq m4, m5
+ movd m5, [srcq +%4]
+ punpckldq m0, m5
+ movlhps m0, m0
+ movlhps m1, m2
+ movlhps m3, m4
psadbw m1, m0
- psadbw m2, m0
psadbw m3, m0
- psadbw m4, m0
- punpckldq m1, m2
- punpckldq m3, m4
paddd m6, m1
paddd m7, m3
%endif
@@ -170,10 +178,16 @@
PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6
%endmacro
+; PROCESS_128x2x4 first, off_{first,second}_{src,ref}, advance_at_end
+%macro PROCESS_128x2x4 5-6 0
+ PROCESS_64x2x4 %1, %2, %3, %2 + 64, %3 + 64
+ PROCESS_64x2x4 0, %4, %5, %4 + 64, %5 + 64, %6
+%endmacro
+
; void aom_sadNxNx4d_sse2(uint8_t *src, int src_stride,
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
-; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8
+; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
%macro SADNXN4D 2
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
@@ -195,7 +209,7 @@
%endrep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
-%if mmsize == 16
+%if %1 > 4
pslldq m5, 4
pslldq m7, 4
por m4, m5
@@ -210,8 +224,10 @@
RET
%else
movifnidn r4, r4mp
- movq [r4+0], m6
- movq [r4+8], m7
+ pshufd m6, m6, 0x08
+ pshufd m7, m7, 0x08
+ movq [r4+0], m6
+ movq [r4+8], m7
RET
%endif
%endmacro
@@ -228,7 +244,5 @@
SADNXN4D 8, 16
SADNXN4D 8, 8
SADNXN4D 8, 4
-
-INIT_MMX sse
SADNXN4D 4, 8
SADNXN4D 4, 4
diff --git a/aom_dsp/x86/subpel_variance_sse2.asm b/aom_dsp/x86/subpel_variance_sse2.asm
index 0bfc63a..d3feb7e 100644
--- a/aom_dsp/x86/subpel_variance_sse2.asm
+++ b/aom_dsp/x86/subpel_variance_sse2.asm
@@ -60,8 +60,8 @@
paddd %6, %1
%endmacro
-%macro STORE_AND_RET 0
-%if mmsize == 16
+%macro STORE_AND_RET 1
+%if %1 > 4
; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit
; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg.
; We have to sign-extend it before adding the words within the register
@@ -81,16 +81,16 @@
movd [r1], m7 ; store sse
paddd m6, m4
movd raxd, m6 ; store sum as return value
-%else ; mmsize == 8
- pshufw m4, m6, 0xe
- pshufw m3, m7, 0xe
+%else ; 4xh
+ pshuflw m4, m6, 0xe
+ pshuflw m3, m7, 0xe
paddw m6, m4
paddd m7, m3
pcmpgtw m5, m6 ; mask for 0 > x
mov r1, ssem ; r1 = unsigned int *sse
punpcklwd m6, m5 ; sign-extend m6 word->dword
movd [r1], m7 ; store sse
- pshufw m4, m6, 0xe
+ pshuflw m4, m6, 0xe
paddd m6, m4
movd raxd, m6 ; store sum as return value
%endif
@@ -199,6 +199,12 @@
%endif
%endif
+%if %1 == 4
+ %define movx movd
+%else
+ %define movx movh
+%endif
+
ASSERT %1 <= 16 ; m6 overflows if w > 16
pxor m6, m6 ; sum
pxor m7, m7 ; sse
@@ -231,6 +237,7 @@
%endif
punpckhbw m2, m0, m5
punpcklbw m0, m5
+
%if %2 == 0 ; !avg
punpckhbw m3, m1, m5
punpcklbw m1, m5
@@ -240,24 +247,37 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
+ movx m0, [srcq]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m0, [srcq+src_strideq]
-%else ; mmsize == 8
- punpckldq m0, [srcq+src_strideq]
+%else ; 4xh
+ movx m1, [srcq+src_strideq]
+ punpckldq m0, m1
%endif
%else ; !avg
- movh m2, [srcq+src_strideq]
+ movx m2, [srcq+src_strideq]
%endif
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
+
%if %2 == 1 ; avg
+%if %1 > 4
pavgb m0, [secq]
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+%endif
punpcklbw m3, m5
punpcklbw m1, m5
+%if %1 > 4
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
punpcklbw m0, m5
punpcklbw m2, m5
@@ -274,10 +294,10 @@
%endif
dec block_height
jg .x_zero_y_zero_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_zero_y_nonzero:
- cmp y_offsetd, 8
+ cmp y_offsetd, 4
jne .x_zero_y_nonhalf
; x_offset == 0 && y_offset == 0.5
@@ -299,37 +319,41 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m2, [srcq+src_strideq]
+ movx m0, [srcq]
+ movx m2, [srcq+src_strideq]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m2, [srcq+src_strideq*2]
-%else ; mmsize == 8
-%if %1 == 4
- movh m1, [srcq+src_strideq*2]
+%else ; 4xh
+ movx m1, [srcq+src_strideq*2]
punpckldq m2, m1
-%else
- punpckldq m2, [srcq+src_strideq*2]
%endif
-%endif
- movh m1, [dstq]
-%if mmsize == 16
+ movx m1, [dstq]
+%if %1 > 4
movlhps m0, m2
-%else ; mmsize == 8
+%else ; 4xh
punpckldq m0, m2
%endif
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
pavgb m0, m2
punpcklbw m1, m5
+%if %1 > 4
pavgb m0, [secq]
punpcklbw m3, m5
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ movh m4, [secq]
+ pavgb m0, m4
+ punpcklbw m3, m5
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
- movh m4, [srcq+src_strideq*2]
- movh m1, [dstq]
+ movx m4, [srcq+src_strideq*2]
+ movx m1, [dstq]
pavgb m0, m2
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
pavgb m2, m4
punpcklbw m0, m5
punpcklbw m2, m5
@@ -346,7 +370,7 @@
%endif
dec block_height
jg .x_zero_y_half_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
@@ -354,7 +378,7 @@
lea bilin_filter, [bilin_filter_m]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -427,12 +451,12 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m2, [srcq+src_strideq]
- movh m4, [srcq+src_strideq*2]
- movh m3, [dstq+dst_strideq]
+ movx m0, [srcq]
+ movx m2, [srcq+src_strideq]
+ movx m4, [srcq+src_strideq*2]
+ movx m3, [dstq+dst_strideq]
%if cpuflag(ssse3)
- movh m1, [dstq]
+ movx m1, [dstq]
punpcklbw m0, m2
punpcklbw m2, m4
pmaddubsw m0, filter_y_a
@@ -452,17 +476,27 @@
pmullw m4, filter_y_b
paddw m0, m1
paddw m2, filter_rnd
- movh m1, [dstq]
+ movx m1, [dstq]
paddw m2, m4
%endif
psraw m0, 4
psraw m2, 4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m1, m5
SUM_SSE m0, m1, m2, m3, m6, m7
@@ -478,10 +512,10 @@
%undef filter_y_a
%undef filter_y_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonzero:
- cmp x_offsetd, 8
+ cmp x_offsetd, 4
jne .x_nonhalf
; x_offset == 0.5
test y_offsetd, y_offsetd
@@ -506,30 +540,40 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m4, [srcq+1]
+ movx m0, [srcq]
+ movx m4, [srcq+1]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m0, [srcq+src_strideq]
movhps m4, [srcq+src_strideq+1]
-%else ; mmsize == 8
- punpckldq m0, [srcq+src_strideq]
- punpckldq m4, [srcq+src_strideq+1]
+%else ; 4xh
+ movx m1, [srcq+src_strideq]
+ punpckldq m0, m1
+ movx m2, [srcq+src_strideq+1]
+ punpckldq m4, m2
%endif
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
pavgb m0, m4
punpcklbw m3, m5
+%if %1 > 4
pavgb m0, [secq]
punpcklbw m1, m5
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else ; 4xh
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m1, m5
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
- movh m2, [srcq+src_strideq]
- movh m1, [dstq]
+ movx m2, [srcq+src_strideq]
+ movx m1, [dstq]
pavgb m0, m4
- movh m4, [srcq+src_strideq+1]
- movh m3, [dstq+dst_strideq]
+ movx m4, [srcq+src_strideq+1]
+ movx m3, [dstq+dst_strideq]
pavgb m2, m4
punpcklbw m0, m5
punpcklbw m2, m5
@@ -546,10 +590,10 @@
%endif
dec block_height
jg .x_half_y_zero_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_half_y_nonzero:
- cmp y_offsetd, 8
+ cmp y_offsetd, 4
jne .x_half_y_nonhalf
; x_offset == 0.5 && y_offset == 0.5
@@ -581,53 +625,58 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m3, [srcq+1]
+ movx m0, [srcq]
+ movx m3, [srcq+1]
add srcq, src_strideq
pavgb m0, m3
.x_half_y_half_loop:
- movh m2, [srcq]
- movh m3, [srcq+1]
+ movx m2, [srcq]
+ movx m3, [srcq+1]
%if %2 == 1 ; avg
-%if mmsize == 16
+%if %1 > 4
movhps m2, [srcq+src_strideq]
movhps m3, [srcq+src_strideq+1]
%else
-%if %1 == 4
- movh m1, [srcq+src_strideq]
+ movx m1, [srcq+src_strideq]
punpckldq m2, m1
- movh m1, [srcq+src_strideq+1]
+ movx m1, [srcq+src_strideq+1]
punpckldq m3, m1
-%else
- punpckldq m2, [srcq+src_strideq]
- punpckldq m3, [srcq+src_strideq+1]
-%endif
%endif
pavgb m2, m3
-%if mmsize == 16
+%if %1 > 4
movlhps m0, m2
movhlps m4, m2
-%else ; mmsize == 8
+%else ; 4xh
punpckldq m0, m2
- pshufw m4, m2, 0xe
+ pshuflw m4, m2, 0xe
%endif
- movh m1, [dstq]
+ movx m1, [dstq]
pavgb m0, m2
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
+%if %1 > 4
pavgb m0, [secq]
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+%endif
punpcklbw m3, m5
punpcklbw m1, m5
+%if %1 > 4
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%else ; !avg
- movh m4, [srcq+src_strideq]
- movh m1, [srcq+src_strideq+1]
+ movx m4, [srcq+src_strideq]
+ movx m1, [srcq+src_strideq+1]
pavgb m2, m3
pavgb m4, m1
pavgb m0, m2
pavgb m2, m4
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
punpcklbw m0, m5
punpcklbw m2, m5
punpcklbw m3, m5
@@ -644,7 +693,7 @@
%endif
dec block_height
jg .x_half_y_half_loop
- STORE_AND_RET
+ STORE_AND_RET %1
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
@@ -652,7 +701,7 @@
lea bilin_filter, [bilin_filter_m]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -727,23 +776,23 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m3, [srcq+1]
+ movx m0, [srcq]
+ movx m3, [srcq+1]
add srcq, src_strideq
pavgb m0, m3
%if notcpuflag(ssse3)
punpcklbw m0, m5
%endif
.x_half_y_other_loop:
- movh m2, [srcq]
- movh m1, [srcq+1]
- movh m4, [srcq+src_strideq]
- movh m3, [srcq+src_strideq+1]
+ movx m2, [srcq]
+ movx m1, [srcq+1]
+ movx m4, [srcq+src_strideq]
+ movx m3, [srcq+src_strideq+1]
pavgb m2, m1
pavgb m4, m3
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
%if cpuflag(ssse3)
- movh m1, [dstq]
+ movx m1, [dstq]
punpcklbw m0, m2
punpcklbw m2, m4
pmaddubsw m0, filter_y_a
@@ -763,16 +812,26 @@
pmullw m1, m4, filter_y_b
paddw m2, filter_rnd
paddw m2, m1
- movh m1, [dstq]
+ movx m1, [dstq]
%endif
psraw m0, 4
psraw m2, 4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m1, m5
SUM_SSE m0, m1, m2, m3, m6, m7
@@ -789,7 +848,7 @@
%undef filter_y_a
%undef filter_y_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonhalf:
test y_offsetd, y_offsetd
@@ -800,7 +859,7 @@
lea bilin_filter, [bilin_filter_m]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -868,14 +927,14 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m1, [srcq+1]
- movh m2, [srcq+src_strideq]
- movh m4, [srcq+src_strideq+1]
- movh m3, [dstq+dst_strideq]
+ movx m0, [srcq]
+ movx m1, [srcq+1]
+ movx m2, [srcq+src_strideq]
+ movx m4, [srcq+src_strideq+1]
+ movx m3, [dstq+dst_strideq]
%if cpuflag(ssse3)
punpcklbw m0, m1
- movh m1, [dstq]
+ movx m1, [dstq]
punpcklbw m2, m4
pmaddubsw m0, filter_x_a
pmaddubsw m2, filter_x_a
@@ -895,17 +954,27 @@
pmullw m4, filter_x_b
paddw m0, m1
paddw m2, filter_rnd
- movh m1, [dstq]
+ movx m1, [dstq]
paddw m2, m4
%endif
psraw m0, 4
psraw m2, 4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m1, m5
SUM_SSE m0, m1, m2, m3, m6, m7
@@ -921,10 +990,10 @@
%undef filter_x_a
%undef filter_x_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonhalf_y_nonzero:
- cmp y_offsetd, 8
+ cmp y_offsetd, 4
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
@@ -932,7 +1001,7 @@
lea bilin_filter, [bilin_filter_m]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -1040,8 +1109,8 @@
add srcq, src_strideq
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m1, [srcq+1]
+ movx m0, [srcq]
+ movx m1, [srcq+1]
%if cpuflag(ssse3)
punpcklbw m0, m1
pmaddubsw m0, filter_x_a
@@ -1057,17 +1126,17 @@
add srcq, src_strideq
psraw m0, 4
.x_other_y_half_loop:
- movh m2, [srcq]
- movh m1, [srcq+1]
- movh m4, [srcq+src_strideq]
- movh m3, [srcq+src_strideq+1]
+ movx m2, [srcq]
+ movx m1, [srcq+1]
+ movx m4, [srcq+src_strideq]
+ movx m3, [srcq+src_strideq+1]
%if cpuflag(ssse3)
punpcklbw m2, m1
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmaddubsw m4, filter_x_a
- movh m1, [dstq]
- movh m3, [dstq+dst_strideq]
+ movx m1, [dstq]
+ movx m3, [dstq+dst_strideq]
paddw m2, filter_rnd
paddw m4, filter_rnd
%else
@@ -1082,9 +1151,9 @@
pmullw m3, filter_x_b
paddw m4, filter_rnd
paddw m2, m1
- movh m1, [dstq]
+ movx m1, [dstq]
paddw m4, m3
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
%endif
psraw m2, 4
psraw m4, 4
@@ -1092,10 +1161,20 @@
pavgw m2, m4
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline - also consider going to bytes here
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
punpcklbw m3, m5
punpcklbw m1, m5
@@ -1113,7 +1192,7 @@
%undef filter_x_a
%undef filter_x_b
%undef filter_rnd
- STORE_AND_RET
+ STORE_AND_RET %1
.x_nonhalf_y_nonhalf:
%ifdef PIC
@@ -1121,7 +1200,7 @@
%endif
shl x_offsetd, filter_idx_shift
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -1264,8 +1343,8 @@
INC_SRC_BY_SRC_STRIDE
add dstq, dst_strideq
%else ; %1 < 16
- movh m0, [srcq]
- movh m1, [srcq+1]
+ movx m0, [srcq]
+ movx m1, [srcq+1]
%if cpuflag(ssse3)
punpcklbw m0, m1
pmaddubsw m0, filter_x_a
@@ -1286,20 +1365,20 @@
INC_SRC_BY_SRC_STRIDE
.x_other_y_other_loop:
- movh m2, [srcq]
- movh m1, [srcq+1]
+ movx m2, [srcq]
+ movx m1, [srcq+1]
INC_SRC_BY_SRC_STRIDE
- movh m4, [srcq]
- movh m3, [srcq+1]
+ movx m4, [srcq]
+ movx m3, [srcq+1]
%if cpuflag(ssse3)
punpcklbw m2, m1
punpcklbw m4, m3
pmaddubsw m2, filter_x_a
pmaddubsw m4, filter_x_a
- movh m3, [dstq+dst_strideq]
- movh m1, [dstq]
+ movx m3, [dstq+dst_strideq]
+ movx m1, [dstq]
paddw m2, filter_rnd
paddw m4, filter_rnd
psraw m2, 4
@@ -1338,9 +1417,9 @@
pmullw m1, m4, filter_y_b
paddw m2, filter_rnd
paddw m0, m3
- movh m3, [dstq+dst_strideq]
+ movx m3, [dstq+dst_strideq]
paddw m2, m1
- movh m1, [dstq]
+ movx m1, [dstq]
psraw m0, 4
psraw m2, 4
punpcklbw m3, m5
@@ -1348,10 +1427,20 @@
%endif
%if %2 == 1 ; avg
; FIXME(rbultje) pipeline
+%if %1 == 4
+ movlhps m0, m2
+%endif
packuswb m0, m2
+%if %1 > 4
pavgb m0, [secq]
punpckhbw m2, m0, m5
punpcklbw m0, m5
+%else
+ movh m2, [secq]
+ pavgb m0, m2
+ punpcklbw m0, m5
+ movhlps m2, m0
+%endif
%endif
SUM_SSE m0, m1, m2, m3, m6, m7
mova m0, m4
@@ -1369,7 +1458,8 @@
%undef filter_y_a
%undef filter_y_b
%undef filter_rnd
- STORE_AND_RET
+%undef movx
+ STORE_AND_RET %1
%endmacro
; FIXME(rbultje) the non-bilinear versions (i.e. x=0,8&&y=0,8) are identical
@@ -1378,26 +1468,22 @@
; location in the sse/2 version, rather than duplicating that code in the
; binary.
-INIT_MMX sse
-SUBPEL_VARIANCE 4
INIT_XMM sse2
+SUBPEL_VARIANCE 4
SUBPEL_VARIANCE 8
SUBPEL_VARIANCE 16
-INIT_MMX ssse3
-SUBPEL_VARIANCE 4
INIT_XMM ssse3
+SUBPEL_VARIANCE 4
SUBPEL_VARIANCE 8
SUBPEL_VARIANCE 16
-INIT_MMX sse
-SUBPEL_VARIANCE 4, 1
INIT_XMM sse2
+SUBPEL_VARIANCE 4, 1
SUBPEL_VARIANCE 8, 1
SUBPEL_VARIANCE 16, 1
-INIT_MMX ssse3
-SUBPEL_VARIANCE 4, 1
INIT_XMM ssse3
+SUBPEL_VARIANCE 4, 1
SUBPEL_VARIANCE 8, 1
SUBPEL_VARIANCE 16, 1
diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c
index 949d585..75e9719 100644
--- a/aom_dsp/x86/variance_sse2.c
+++ b/aom_dsp/x86/variance_sse2.c
@@ -335,7 +335,6 @@
return *sse;
}
-#if CONFIG_USE_X86INC
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in subpel_variance.asm
#define DECL(w, opt) \
@@ -344,11 +343,11 @@
const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \
void *unused0, void *unused)
#define DECLS(opt1, opt2) \
- DECL(4, opt2); \
+ DECL(4, opt1); \
DECL(8, opt1); \
DECL(16, opt1)
-DECLS(sse2, sse);
+DECLS(sse2, sse2);
DECLS(ssse3, ssse3);
#undef DECLS
#undef DECL
@@ -397,10 +396,10 @@
FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \
FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \
FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \
- FN(4, 8, 4, 2, 3, opt2, (int32_t), (int32_t)); \
- FN(4, 4, 4, 2, 2, opt2, (int32_t), (int32_t))
+ FN(4, 8, 4, 2, 3, opt1, (int32_t), (int32_t)); \
+ FN(4, 4, 4, 2, 2, opt1, (int32_t), (int32_t))
-FNS(sse2, sse);
+FNS(sse2, sse2);
FNS(ssse3, ssse3);
#undef FNS
@@ -414,11 +413,11 @@
ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
void *unused)
#define DECLS(opt1, opt2) \
- DECL(4, opt2); \
+ DECL(4, opt1); \
DECL(8, opt1); \
DECL(16, opt1)
-DECLS(sse2, sse);
+DECLS(sse2, sse2);
DECLS(ssse3, ssse3);
#undef DECL
#undef DECLS
@@ -468,15 +467,14 @@
FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \
FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \
FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \
- FN(4, 8, 4, 2, 3, opt2, (uint32_t), (int32_t)); \
- FN(4, 4, 4, 2, 2, opt2, (uint32_t), (int32_t))
+ FN(4, 8, 4, 2, 3, opt1, (uint32_t), (int32_t)); \
+ FN(4, 4, 4, 2, 2, opt1, (uint32_t), (int32_t))
FNS(sse2, sse);
FNS(ssse3, ssse3);
#undef FNS
#undef FN
-#endif // CONFIG_USE_X86INC
void aom_upsampled_pred_sse2(uint8_t *pred, int width, int height,
const uint8_t *ref, const int ref_stride) {
@@ -683,7 +681,6 @@
p0 = _mm_packus_epi16(p0, zero);
*(int *)comp_pred = _mm_cvtsi128_si32(p0);
-
comp_pred += 4;
pred += 4;
ref += 4 * 8;
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 176643a..ddc2422 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -677,18 +677,8 @@
//------------------------------------------------------------------------------
// x86 functions
-#if HAVE_SSE
-#if CONFIG_USE_X86INC
-const SadMxNx4Param x4d_sse_tests[] = {
- make_tuple(4, 8, &aom_sad4x8x4d_sse, -1),
- make_tuple(4, 4, &aom_sad4x4x4d_sse, -1),
-};
-INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::ValuesIn(x4d_sse_tests));
-#endif // CONFIG_USE_X86INC
-#endif // HAVE_SSE
#if HAVE_SSE2
-#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {
make_tuple(64, 64, &aom_sad64x64_sse2, -1),
make_tuple(64, 32, &aom_sad64x32_sse2, -1),
@@ -805,6 +795,8 @@
make_tuple(8, 16, &aom_sad8x16x4d_sse2, -1),
make_tuple(8, 8, &aom_sad8x8x4d_sse2, -1),
make_tuple(8, 4, &aom_sad8x4x4d_sse2, -1),
+ make_tuple(4, 8, &aom_sad4x8x4d_sse2, -1),
+ make_tuple(4, 4, &aom_sad4x4x4d_sse2, -1),
#if CONFIG_AOM_HIGHBITDEPTH
make_tuple(64, 64, &aom_highbd_sad64x64x4d_sse2, 8),
make_tuple(64, 32, &aom_highbd_sad64x32x4d_sse2, 8),
@@ -848,7 +840,6 @@
#endif // CONFIG_AOM_HIGHBITDEPTH
};
INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
-#endif // CONFIG_USE_X86INC
#endif // HAVE_SSE2
#if HAVE_SSE3
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 844a1d0..081c85a 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -987,8 +987,8 @@
make_tuple(3, 4, &aom_sub_pixel_variance8x16_sse2, 0),
make_tuple(3, 3, &aom_sub_pixel_variance8x8_sse2, 0),
make_tuple(3, 2, &aom_sub_pixel_variance8x4_sse2, 0),
- make_tuple(2, 3, &aom_sub_pixel_variance4x8_sse, 0),
- make_tuple(2, 2, &aom_sub_pixel_variance4x4_sse, 0)));
+ make_tuple(2, 3, &aom_sub_pixel_variance4x8_sse2, 0),
+ make_tuple(2, 2, &aom_sub_pixel_variance4x4_sse2, 0)));
INSTANTIATE_TEST_CASE_P(
SSE2, AvxSubpelAvgVarianceTest,
@@ -1004,8 +1004,8 @@
make_tuple(3, 4, &aom_sub_pixel_avg_variance8x16_sse2, 0),
make_tuple(3, 3, &aom_sub_pixel_avg_variance8x8_sse2, 0),
make_tuple(3, 2, &aom_sub_pixel_avg_variance8x4_sse2, 0),
- make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_sse, 0),
- make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_sse, 0)));
+ make_tuple(2, 3, &aom_sub_pixel_avg_variance4x8_sse2, 0),
+ make_tuple(2, 2, &aom_sub_pixel_avg_variance4x4_sse2, 0)));
#endif // CONFIG_USE_X86INC
#if CONFIG_AOM_HIGHBITDEPTH