Remove av1_full_sad_search This code is only used in libvpx's vp8 encoder, therefore it is unlikely to be used for encoding av1. BUG=aomedia:1726 Change-Id: Iec2b29d7c44c2b1bb504e25f3a74fb9e7c233662
diff --git a/aom_dsp/aom_dsp.cmake b/aom_dsp/aom_dsp.cmake index a182e32..38c3324 100644 --- a/aom_dsp/aom_dsp.cmake +++ b/aom_dsp/aom_dsp.cmake
@@ -242,17 +242,10 @@ set(AOM_DSP_ENCODER_INTRIN_SSE2 "${AOM_ROOT}/aom_dsp/x86/quantize_sse2.c") - set(AOM_DSP_ENCODER_ASM_SSSE3 - "${AOM_ROOT}/aom_dsp/x86/sad_ssse3.asm") - set(AOM_DSP_ENCODER_ASM_SSSE3_X86_64 "${AOM_ROOT}/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm" "${AOM_ROOT}/aom_dsp/x86/ssim_opt_x86_64.asm") - set(AOM_DSP_ENCODER_ASM_SSE3 "${AOM_ROOT}/aom_dsp/x86/sad_sse3.asm") - - set(AOM_DSP_ENCODER_ASM_SSE4_1 "${AOM_ROOT}/aom_dsp/x86/sad_sse4.asm") - set(AOM_DSP_ENCODER_INTRIN_AVX2 "${AOM_ROOT}/aom_dsp/x86/fwd_dct32x32_impl_avx2.h" "${AOM_ROOT}/aom_dsp/x86/fwd_txfm_avx2.c"
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 702bb6c..9d168a3 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -779,60 +779,6 @@ # - # Multi-block SAD, comparing a reference to N blocks 1 pixel apart horizontally - # - # Blocks of 3 - foreach $s (@block_widths) { - add_proto qw/void/, "aom_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - } - specialize qw/aom_sad64x64x3 msa/; - specialize qw/aom_sad32x32x3 msa/; - specialize qw/aom_sad16x16x3 sse3 ssse3 msa/; - specialize qw/aom_sad8x8x3 sse3 msa/; - specialize qw/aom_sad4x4x3 sse3 msa/; - - add_proto qw/void/, "aom_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - specialize qw/aom_sad16x8x3 sse3 ssse3 msa/; - add_proto qw/void/, "aom_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - specialize qw/aom_sad8x16x3 sse3 msa/; - - # Blocks of 8 - foreach $s (@block_widths) { - add_proto qw/void/, "aom_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - } - specialize qw/aom_sad64x64x8 msa/; - specialize qw/aom_sad32x32x8 msa/; - specialize qw/aom_sad16x16x8 sse4_1 msa/; - specialize qw/aom_sad8x8x8 sse4_1 msa/; - specialize qw/aom_sad4x4x8 sse4_1 msa/; - - add_proto qw/void/, "aom_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - specialize qw/aom_sad16x8x8 sse4_1 msa/; - add_proto qw/void/, "aom_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - specialize qw/aom_sad8x16x8 sse4_1 msa/; - add_proto qw/void/, "aom_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - specialize qw/aom_sad8x4x8 msa/; - add_proto qw/void/, "aom_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - specialize qw/aom_sad4x8x8 msa/; - - - foreach $s (@block_widths) { - # Blocks of 3 - add_proto qw/void/, "aom_highbd_sad${s}x${s}x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - # Blocks of 8 - add_proto qw/void/, "aom_highbd_sad${s}x${s}x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - } - # Blocks of 3 - add_proto qw/void/, "aom_highbd_sad16x8x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - add_proto qw/void/, "aom_highbd_sad8x16x3", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - # Blocks of 8 - add_proto qw/void/, "aom_highbd_sad16x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - add_proto qw/void/, "aom_highbd_sad8x16x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - add_proto qw/void/, "aom_highbd_sad8x4x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - add_proto qw/void/, "aom_highbd_sad4x8x8", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array"; - - - # # Multi-block SAD, comparing a reference to N independent blocks # foreach (@block_sizes) {
diff --git a/aom_dsp/mips/sad_msa.c b/aom_dsp/mips/sad_msa.c index 258eb5c..f8e85fd 100644 --- a/aom_dsp/mips/sad_msa.c +++ b/aom_dsp/mips/sad_msa.c
@@ -160,640 +160,6 @@ return sad; } -static void sad_4width_x3_msa(const uint8_t *src_ptr, int32_t src_stride, - const uint8_t *ref_ptr, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - uint32_t src0, src1, src2, src3; - v16u8 src = { 0 }; - v16u8 ref = { 0 }; - v16u8 ref0, ref1, ref2, ref3, diff; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - - for (ht_cnt = (height >> 2); ht_cnt--;) { - LW4(src_ptr, src_stride, src0, src1, src2, src3); - src_ptr += (4 * src_stride); - INSERT_W4_UB(src0, src1, src2, src3, src); - - LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); - ref_ptr += (4 * ref_stride); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad0 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad1 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad2 += __msa_hadd_u_h(diff, diff); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); -} - -static void sad_8width_x3_msa(const uint8_t *src, int32_t src_stride, - const uint8_t *ref, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src0, src1, src2, src3; - v16u8 ref0, ref1, ref00, ref11, ref22, ref33; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - - for (ht_cnt = (height >> 2); ht_cnt--;) { - LD_UB4(src, src_stride, src0, src1, src2, src3); - src += (4 * src_stride); - LD_UB4(ref, ref_stride, ref00, ref11, ref22, ref33); - ref += (4 * ref_stride); - PCKEV_D4_UB(src1, src0, src3, src2, ref11, ref00, ref33, ref22, src0, src1, - ref0, ref1); - sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); -} - -static void sad_16width_x3_msa(const uint8_t *src_ptr, int32_t src_stride, - const uint8_t *ref_ptr, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src, ref, ref0, ref1, diff; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - - for (ht_cnt = (height >> 1); ht_cnt--;) { - src = LD_UB(src_ptr); - src_ptr += src_stride; - LD_UB2(ref_ptr, 16, ref0, ref1); - ref_ptr += ref_stride; - - diff = __msa_asub_u_b(src, ref0); - sad0 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); - diff = __msa_asub_u_b(src, ref); - sad1 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); - diff = __msa_asub_u_b(src, ref); - sad2 += __msa_hadd_u_h(diff, diff); - - src = LD_UB(src_ptr); - src_ptr += src_stride; - LD_UB2(ref_ptr, 16, ref0, ref1); - ref_ptr += ref_stride; - - diff = __msa_asub_u_b(src, ref0); - sad0 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); - diff = __msa_asub_u_b(src, ref); - sad1 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); - diff = __msa_asub_u_b(src, ref); - sad2 += __msa_hadd_u_h(diff, diff); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); -} - -static void sad_32width_x3_msa(const uint8_t *src, int32_t src_stride, - const uint8_t *ref, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src0, src1, ref0_0, ref0_1, ref0_2, ref0, ref1; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - - for (ht_cnt = height >> 1; ht_cnt--;) { - LD_UB2(src, 16, src0, src1); - src += src_stride; - LD_UB3(ref, 16, ref0_0, ref0_1, ref0_2); - ref += ref_stride; - - sad0 += SAD_UB2_UH(src0, src1, ref0_0, ref0_1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 1); - sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 2); - sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); - - LD_UB2(src, 16, src0, src1); - src += src_stride; - LD_UB3(ref, 16, ref0_0, ref0_1, ref0_2); - ref += ref_stride; - - sad0 += SAD_UB2_UH(src0, src1, ref0_0, ref0_1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 1); - sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 2); - sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); -} - -static void sad_64width_x3_msa(const uint8_t *src, int32_t src_stride, - const uint8_t *ref, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src0, src1, src2, src3; - v16u8 ref0_0, ref0_1, ref0_2, ref0_3, ref0_4, ref0, ref1, ref2, ref3; - v8u16 sad0_0 = { 0 }; - v8u16 sad0_1 = { 0 }; - v8u16 sad1_0 = { 0 }; - v8u16 sad1_1 = { 0 }; - v8u16 sad2_0 = { 0 }; - v8u16 sad2_1 = { 0 }; - v4u32 sad; - - for (ht_cnt = height; ht_cnt--;) { - LD_UB4(src, 16, src0, src1, src2, src3); - src += src_stride; - LD_UB4(ref, 16, ref0_0, ref0_1, ref0_2, ref0_3); - ref0_4 = LD_UB(ref + 64); - ref += ref_stride; - - sad0_0 += SAD_UB2_UH(src0, src1, ref0_0, ref0_1); - sad0_1 += SAD_UB2_UH(src2, src3, ref0_2, ref0_3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 1); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 1); - sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 2); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 2); - sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - } - - sad = __msa_hadd_u_w(sad0_0, sad0_0); - sad += __msa_hadd_u_w(sad0_1, sad0_1); - sad_array[0] = HADD_SW_S32((v4i32)sad); - - sad = __msa_hadd_u_w(sad1_0, sad1_0); - sad += __msa_hadd_u_w(sad1_1, sad1_1); - sad_array[1] = HADD_SW_S32((v4i32)sad); - - sad = __msa_hadd_u_w(sad2_0, sad2_0); - sad += __msa_hadd_u_w(sad2_1, sad2_1); - sad_array[2] = HADD_SW_S32((v4i32)sad); -} - -static void sad_4width_x8_msa(const uint8_t *src_ptr, int32_t src_stride, - const uint8_t *ref_ptr, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - uint32_t src0, src1, src2, src3; - v16u8 ref0, ref1, ref2, ref3, diff; - v16u8 src = { 0 }; - v16u8 ref = { 0 }; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - v8u16 sad3 = { 0 }; - v8u16 sad4 = { 0 }; - v8u16 sad5 = { 0 }; - v8u16 sad6 = { 0 }; - v8u16 sad7 = { 0 }; - - for (ht_cnt = (height >> 2); ht_cnt--;) { - LW4(src_ptr, src_stride, src0, src1, src2, src3); - INSERT_W4_UB(src0, src1, src2, src3, src); - src_ptr += (4 * src_stride); - LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); - ref_ptr += (4 * ref_stride); - - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad0 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad1 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad2 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad3 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad4 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad5 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad6 += __msa_hadd_u_h(diff, diff); - - SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); - SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); - SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); - diff = __msa_asub_u_b(src, ref); - sad7 += __msa_hadd_u_h(diff, diff); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); - sad_array[3] = HADD_UH_U32(sad3); - sad_array[4] = HADD_UH_U32(sad4); - sad_array[5] = HADD_UH_U32(sad5); - sad_array[6] = HADD_UH_U32(sad6); - sad_array[7] = HADD_UH_U32(sad7); -} - -static void sad_8width_x8_msa(const uint8_t *src, int32_t src_stride, - const uint8_t *ref, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src0, src1, src2, src3; - v16u8 ref0, ref1, ref00, ref11, ref22, ref33; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - v8u16 sad3 = { 0 }; - v8u16 sad4 = { 0 }; - v8u16 sad5 = { 0 }; - v8u16 sad6 = { 0 }; - v8u16 sad7 = { 0 }; - - for (ht_cnt = (height >> 2); ht_cnt--;) { - LD_UB4(src, src_stride, src0, src1, src2, src3); - src += (4 * src_stride); - LD_UB4(ref, ref_stride, ref00, ref11, ref22, ref33); - ref += (4 * ref_stride); - PCKEV_D4_UB(src1, src0, src3, src2, ref11, ref00, ref33, ref22, src0, src1, - ref0, ref1); - sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad3 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad4 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad5 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad6 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); - SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); - PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); - sad7 += SAD_UB2_UH(src0, src1, ref0, ref1); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); - sad_array[3] = HADD_UH_U32(sad3); - sad_array[4] = HADD_UH_U32(sad4); - sad_array[5] = HADD_UH_U32(sad5); - sad_array[6] = HADD_UH_U32(sad6); - sad_array[7] = HADD_UH_U32(sad7); -} - -static void sad_16width_x8_msa(const uint8_t *src_ptr, int32_t src_stride, - const uint8_t *ref_ptr, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src, ref0, ref1, ref; - v16u8 diff; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - v8u16 sad3 = { 0 }; - v8u16 sad4 = { 0 }; - v8u16 sad5 = { 0 }; - v8u16 sad6 = { 0 }; - v8u16 sad7 = { 0 }; - - for (ht_cnt = (height >> 1); ht_cnt--;) { - src = LD_UB(src_ptr); - src_ptr += src_stride; - LD_UB2(ref_ptr, 16, ref0, ref1); - ref_ptr += ref_stride; - - diff = __msa_asub_u_b(src, ref0); - sad0 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); - diff = __msa_asub_u_b(src, ref); - sad1 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); - diff = __msa_asub_u_b(src, ref); - sad2 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 3); - diff = __msa_asub_u_b(src, ref); - sad3 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 4); - diff = __msa_asub_u_b(src, ref); - sad4 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 5); - diff = __msa_asub_u_b(src, ref); - sad5 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 6); - diff = __msa_asub_u_b(src, ref); - sad6 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 7); - diff = __msa_asub_u_b(src, ref); - sad7 += __msa_hadd_u_h(diff, diff); - - src = LD_UB(src_ptr); - src_ptr += src_stride; - LD_UB2(ref_ptr, 16, ref0, ref1); - ref_ptr += ref_stride; - - diff = __msa_asub_u_b(src, ref0); - sad0 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); - diff = __msa_asub_u_b(src, ref); - sad1 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); - diff = __msa_asub_u_b(src, ref); - sad2 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 3); - diff = __msa_asub_u_b(src, ref); - sad3 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 4); - diff = __msa_asub_u_b(src, ref); - sad4 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 5); - diff = __msa_asub_u_b(src, ref); - sad5 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 6); - diff = __msa_asub_u_b(src, ref); - sad6 += __msa_hadd_u_h(diff, diff); - - ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 7); - diff = __msa_asub_u_b(src, ref); - sad7 += __msa_hadd_u_h(diff, diff); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); - sad_array[3] = HADD_UH_U32(sad3); - sad_array[4] = HADD_UH_U32(sad4); - sad_array[5] = HADD_UH_U32(sad5); - sad_array[6] = HADD_UH_U32(sad6); - sad_array[7] = HADD_UH_U32(sad7); -} - -static void sad_32width_x8_msa(const uint8_t *src, int32_t src_stride, - const uint8_t *ref, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - int32_t ht_cnt; - v16u8 src0, src1; - v16u8 ref0, ref1, ref0_0, ref0_1, ref0_2; - v8u16 sad0 = { 0 }; - v8u16 sad1 = { 0 }; - v8u16 sad2 = { 0 }; - v8u16 sad3 = { 0 }; - v8u16 sad4 = { 0 }; - v8u16 sad5 = { 0 }; - v8u16 sad6 = { 0 }; - v8u16 sad7 = { 0 }; - - for (ht_cnt = height; ht_cnt--;) { - LD_UB2(src, 16, src0, src1); - src += src_stride; - LD_UB3(ref, 16, ref0_0, ref0_1, ref0_2); - ref += ref_stride; - - sad0 += SAD_UB2_UH(src0, src1, ref0_0, ref0_1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 1); - sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 2); - sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 3); - sad3 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 4); - sad4 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 5); - sad5 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 6); - sad6 += SAD_UB2_UH(src0, src1, ref0, ref1); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 7); - sad7 += SAD_UB2_UH(src0, src1, ref0, ref1); - } - - sad_array[0] = HADD_UH_U32(sad0); - sad_array[1] = HADD_UH_U32(sad1); - sad_array[2] = HADD_UH_U32(sad2); - sad_array[3] = HADD_UH_U32(sad3); - sad_array[4] = HADD_UH_U32(sad4); - sad_array[5] = HADD_UH_U32(sad5); - sad_array[6] = HADD_UH_U32(sad6); - sad_array[7] = HADD_UH_U32(sad7); -} - -static void sad_64width_x8_msa(const uint8_t *src, int32_t src_stride, - const uint8_t *ref, int32_t ref_stride, - int32_t height, uint32_t *sad_array) { - const uint8_t *src_dup, *ref_dup; - int32_t ht_cnt; - v16u8 src0, src1, src2, src3; - v16u8 ref0_0, ref0_1, ref0_2, ref0_3, ref0_4; - v16u8 ref0, ref1, ref2, ref3; - v8u16 sad0_0 = { 0 }; - v8u16 sad0_1 = { 0 }; - v8u16 sad1_0 = { 0 }; - v8u16 sad1_1 = { 0 }; - v8u16 sad2_0 = { 0 }; - v8u16 sad2_1 = { 0 }; - v8u16 sad3_0 = { 0 }; - v8u16 sad3_1 = { 0 }; - v4u32 sad; - - src_dup = src; - ref_dup = ref; - - for (ht_cnt = height; ht_cnt--;) { - LD_UB4(src, 16, src0, src1, src2, src3); - src += src_stride; - LD_UB5(ref, 16, ref0_0, ref0_1, ref0_2, ref0_3, ref0_4); - ref += ref_stride; - - sad0_0 += SAD_UB2_UH(src0, src1, ref0_0, ref0_1); - sad0_1 += SAD_UB2_UH(src2, src3, ref0_2, ref0_3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 1); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 1); - sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 2); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 2); - sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 3); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 3); - sad3_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - } - - sad = __msa_hadd_u_w(sad0_0, sad0_0); - sad += __msa_hadd_u_w(sad0_1, sad0_1); - sad_array[0] = HADD_SW_S32(sad); - - sad = __msa_hadd_u_w(sad1_0, sad1_0); - sad += __msa_hadd_u_w(sad1_1, sad1_1); - sad_array[1] = HADD_SW_S32(sad); - - sad = __msa_hadd_u_w(sad2_0, sad2_0); - sad += __msa_hadd_u_w(sad2_1, sad2_1); - sad_array[2] = HADD_SW_S32(sad); - - sad = __msa_hadd_u_w(sad3_0, sad3_0); - sad += __msa_hadd_u_w(sad3_1, sad3_1); - sad_array[3] = HADD_SW_S32(sad); - - sad0_0 = (v8u16)__msa_ldi_h(0); - sad0_1 = (v8u16)__msa_ldi_h(0); - sad1_0 = (v8u16)__msa_ldi_h(0); - sad1_1 = (v8u16)__msa_ldi_h(0); - sad2_0 = (v8u16)__msa_ldi_h(0); - sad2_1 = (v8u16)__msa_ldi_h(0); - sad3_0 = (v8u16)__msa_ldi_h(0); - sad3_1 = (v8u16)__msa_ldi_h(0); - - for (ht_cnt = 64; ht_cnt--;) { - LD_UB4(src_dup, 16, src0, src1, src2, src3); - src_dup += src_stride; - LD_UB5(ref_dup, 16, ref0_0, ref0_1, ref0_2, ref0_3, ref0_4); - ref_dup += ref_stride; - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 4); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 4); - sad0_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad0_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 5); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 5); - sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 6); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 6); - sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - - SLDI_B2_UB(ref0_1, ref0_2, ref0_0, ref0_1, ref0, ref1, 7); - SLDI_B2_UB(ref0_3, ref0_4, ref0_2, ref0_3, ref2, ref3, 7); - sad3_0 += SAD_UB2_UH(src0, src1, ref0, ref1); - sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3); - } - - sad = __msa_hadd_u_w(sad0_0, sad0_0); - sad += __msa_hadd_u_w(sad0_1, sad0_1); - sad_array[4] = HADD_SW_S32(sad); - - sad = __msa_hadd_u_w(sad1_0, sad1_0); - sad += __msa_hadd_u_w(sad1_1, sad1_1); - sad_array[5] = HADD_SW_S32(sad); - - sad = __msa_hadd_u_w(sad2_0, sad2_0); - sad += __msa_hadd_u_w(sad2_1, sad2_1); - sad_array[6] = HADD_SW_S32(sad); - - sad = __msa_hadd_u_w(sad3_0, sad3_0); - sad += __msa_hadd_u_w(sad3_1, sad3_1); - sad_array[7] = HADD_SW_S32(sad); -} - static void sad_4width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *const aref_ptr[], int32_t ref_stride, int32_t height, @@ -1290,76 +656,6 @@ return sad_64width_msa(src, src_stride, ref, ref_stride, height); \ } -#define AOM_SAD_4xHEIGHTx3_MSA(height) \ - void aom_sad4x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_4width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_8xHEIGHTx3_MSA(height) \ - void aom_sad8x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_8width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_16xHEIGHTx3_MSA(height) \ - void aom_sad16x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_16width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_32xHEIGHTx3_MSA(height) \ - void aom_sad32x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_32width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_64xHEIGHTx3_MSA(height) \ - void aom_sad64x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_64width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_4xHEIGHTx8_MSA(height) \ - void aom_sad4x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_4width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_8xHEIGHTx8_MSA(height) \ - void aom_sad8x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_8width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_16xHEIGHTx8_MSA(height) \ - void aom_sad16x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_16width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_32xHEIGHTx8_MSA(height) \ - void aom_sad32x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_32width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - -#define AOM_SAD_64xHEIGHTx8_MSA(height) \ - void aom_sad64x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ - const uint8_t *ref, int32_t ref_stride, \ - uint32_t *sads) { \ - sad_64width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ - } - #define AOM_SAD_4xHEIGHTx4D_MSA(height) \ void aom_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *const refs[], \ @@ -1438,92 +734,66 @@ /* clang-format off */ // 64x64 AOM_SAD_64xHEIGHT_MSA(64) -AOM_SAD_64xHEIGHTx3_MSA(64) -AOM_SAD_64xHEIGHTx8_MSA(64) AOM_SAD_64xHEIGHTx4D_MSA(64) AOM_AVGSAD_64xHEIGHT_MSA(64) // 64x32 AOM_SAD_64xHEIGHT_MSA(32) -AOM_SAD_64xHEIGHTx3_MSA(32) -AOM_SAD_64xHEIGHTx8_MSA(32) AOM_SAD_64xHEIGHTx4D_MSA(32) AOM_AVGSAD_64xHEIGHT_MSA(32) // 32x64 AOM_SAD_32xHEIGHT_MSA(64) -AOM_SAD_32xHEIGHTx3_MSA(64) -AOM_SAD_32xHEIGHTx8_MSA(64) AOM_SAD_32xHEIGHTx4D_MSA(64) AOM_AVGSAD_32xHEIGHT_MSA(64) // 32x32 AOM_SAD_32xHEIGHT_MSA(32) -AOM_SAD_32xHEIGHTx3_MSA(32) -AOM_SAD_32xHEIGHTx8_MSA(32) AOM_SAD_32xHEIGHTx4D_MSA(32) AOM_AVGSAD_32xHEIGHT_MSA(32) // 32x16 AOM_SAD_32xHEIGHT_MSA(16) -AOM_SAD_32xHEIGHTx3_MSA(16) -AOM_SAD_32xHEIGHTx8_MSA(16) AOM_SAD_32xHEIGHTx4D_MSA(16) AOM_AVGSAD_32xHEIGHT_MSA(16) // 16x32 AOM_SAD_16xHEIGHT_MSA(32) -AOM_SAD_16xHEIGHTx3_MSA(32) -AOM_SAD_16xHEIGHTx8_MSA(32) AOM_SAD_16xHEIGHTx4D_MSA(32) AOM_AVGSAD_16xHEIGHT_MSA(32) // 16x16 AOM_SAD_16xHEIGHT_MSA(16) -AOM_SAD_16xHEIGHTx3_MSA(16) -AOM_SAD_16xHEIGHTx8_MSA(16) AOM_SAD_16xHEIGHTx4D_MSA(16) AOM_AVGSAD_16xHEIGHT_MSA(16) // 16x8 AOM_SAD_16xHEIGHT_MSA(8) -AOM_SAD_16xHEIGHTx3_MSA(8) -AOM_SAD_16xHEIGHTx8_MSA(8) AOM_SAD_16xHEIGHTx4D_MSA(8) AOM_AVGSAD_16xHEIGHT_MSA(8) // 8x16 AOM_SAD_8xHEIGHT_MSA(16) -AOM_SAD_8xHEIGHTx3_MSA(16) -AOM_SAD_8xHEIGHTx8_MSA(16) AOM_SAD_8xHEIGHTx4D_MSA(16) AOM_AVGSAD_8xHEIGHT_MSA(16) // 8x8 AOM_SAD_8xHEIGHT_MSA(8) -AOM_SAD_8xHEIGHTx3_MSA(8) -AOM_SAD_8xHEIGHTx8_MSA(8) AOM_SAD_8xHEIGHTx4D_MSA(8) AOM_AVGSAD_8xHEIGHT_MSA(8) // 8x4 AOM_SAD_8xHEIGHT_MSA(4) -AOM_SAD_8xHEIGHTx3_MSA(4) -AOM_SAD_8xHEIGHTx8_MSA(4) AOM_SAD_8xHEIGHTx4D_MSA(4) AOM_AVGSAD_8xHEIGHT_MSA(4) // 4x8 AOM_SAD_4xHEIGHT_MSA(8) -AOM_SAD_4xHEIGHTx3_MSA(8) -AOM_SAD_4xHEIGHTx8_MSA(8) AOM_SAD_4xHEIGHTx4D_MSA(8) AOM_AVGSAD_4xHEIGHT_MSA(8) // 4x4 AOM_SAD_4xHEIGHT_MSA(4) -AOM_SAD_4xHEIGHTx3_MSA(4) -AOM_SAD_4xHEIGHTx8_MSA(4) AOM_SAD_4xHEIGHTx4D_MSA(4) AOM_AVGSAD_4xHEIGHT_MSA(4) /* clang-format on */
diff --git a/aom_dsp/sad.c b/aom_dsp/sad.c index 47b657c..933b177 100644 --- a/aom_dsp/sad.c +++ b/aom_dsp/sad.c
@@ -61,19 +61,7 @@ return sad(src, src_stride, comp_pred, m, m, n); \ } -// depending on call sites, pass **ref_array to avoid & in subsequent call and -// de-dup with 4D below. -#define sadMxNxK(m, n, k) \ - void aom_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref_array, int ref_stride, \ - uint32_t *sad_array) { \ - int i; \ - for (i = 0; i < k; ++i) \ - sad_array[i] = \ - aom_sad##m##x##n##_c(src, src_stride, &ref_array[i], ref_stride); \ - } - -// This appears to be equivalent to the above when k == 4 and refs is const +// Calculate sad against 4 reference locations and store each in sad_array #define sadMxNx4D(m, n) \ void aom_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ const uint8_t *const ref_array[], \ @@ -88,8 +76,6 @@ #if CONFIG_AV1 // 128x128 sadMxN(128, 128) -sadMxNxK(128, 128, 3) -sadMxNxK(128, 128, 8) sadMxNx4D(128, 128) // 128x64 @@ -103,8 +89,6 @@ // 64x64 sadMxN(64, 64) -sadMxNxK(64, 64, 3) -sadMxNxK(64, 64, 8) sadMxNx4D(64, 64) // 64x32 @@ -117,8 +101,6 @@ // 32x32 sadMxN(32, 32) -sadMxNxK(32, 32, 3) -sadMxNxK(32, 32, 8) sadMxNx4D(32, 32) // 32x16 @@ -131,42 +113,30 @@ // 16x16 sadMxN(16, 16) -sadMxNxK(16, 16, 3) -sadMxNxK(16, 16, 8) sadMxNx4D(16, 16) // 16x8 sadMxN(16, 8) -sadMxNxK(16, 8, 3) -sadMxNxK(16, 8, 8) sadMxNx4D(16, 8) // 8x16 sadMxN(8, 16) -sadMxNxK(8, 16, 3) -sadMxNxK(8, 16, 8) sadMxNx4D(8, 16) // 8x8 sadMxN(8, 8) -sadMxNxK(8, 8, 3) -sadMxNxK(8, 8, 8) sadMxNx4D(8, 8) // 8x4 sadMxN(8, 4) -sadMxNxK(8, 4, 8) sadMxNx4D(8, 4) // 4x8 sadMxN(4, 8) -sadMxNxK(4, 8, 8) sadMxNx4D(4, 8) // 4x4 sadMxN(4, 4) -sadMxNxK(4, 4, 3) -sadMxNxK(4, 4, 8) sadMxNx4D(4, 4) sadMxh(128); @@ -245,17 +215,6 @@ return highbd_sadb(src, src_stride, comp_pred, m, m, n); \ } -#define highbd_sadMxNxK(m, n, k) \ - void aom_highbd_sad##m##x##n##x##k##_c( \ - const uint8_t *src, int src_stride, const uint8_t *ref_array, \ - int ref_stride, uint32_t *sad_array) { \ - int i; \ - for (i = 0; i < k; ++i) { \ - sad_array[i] = aom_highbd_sad##m##x##n##_c(src, src_stride, \ - &ref_array[i], ref_stride); \ - } \ - } - #define highbd_sadMxNx4D(m, n) \ void aom_highbd_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ const uint8_t *const ref_array[], \ @@ -271,8 +230,6 @@ #if CONFIG_AV1 // 128x128 highbd_sadMxN(128, 128) -highbd_sadMxNxK(128, 128, 3) -highbd_sadMxNxK(128, 128, 8) highbd_sadMxNx4D(128, 128) // 128x64 @@ -286,8 +243,6 @@ // 64x64 highbd_sadMxN(64, 64) -highbd_sadMxNxK(64, 64, 3) -highbd_sadMxNxK(64, 64, 8) highbd_sadMxNx4D(64, 64) // 64x32 @@ -300,8 +255,6 @@ // 32x32 highbd_sadMxN(32, 32) -highbd_sadMxNxK(32, 32, 3) -highbd_sadMxNxK(32, 32, 8) highbd_sadMxNx4D(32, 32) // 32x16 @@ -314,42 +267,30 @@ // 16x16 highbd_sadMxN(16, 16) -highbd_sadMxNxK(16, 16, 3) -highbd_sadMxNxK(16, 16, 8) highbd_sadMxNx4D(16, 16) // 16x8 highbd_sadMxN(16, 8) -highbd_sadMxNxK(16, 8, 3) -highbd_sadMxNxK(16, 8, 8) highbd_sadMxNx4D(16, 8) // 8x16 highbd_sadMxN(8, 16) -highbd_sadMxNxK(8, 16, 3) -highbd_sadMxNxK(8, 16, 8) highbd_sadMxNx4D(8, 16) // 8x8 highbd_sadMxN(8, 8) -highbd_sadMxNxK(8, 8, 3) -highbd_sadMxNxK(8, 8, 8) highbd_sadMxNx4D(8, 8) // 8x4 highbd_sadMxN(8, 4) -highbd_sadMxNxK(8, 4, 8) highbd_sadMxNx4D(8, 4) // 4x8 highbd_sadMxN(4, 8) -highbd_sadMxNxK(4, 8, 8) highbd_sadMxNx4D(4, 8) // 4x4 highbd_sadMxN(4, 4) -highbd_sadMxNxK(4, 4, 3) -highbd_sadMxNxK(4, 4, 8) highbd_sadMxNx4D(4, 4) #if CONFIG_AV1
diff --git a/aom_dsp/variance.h b/aom_dsp/variance.h index ab21799..cf9d124 100644 --- a/aom_dsp/variance.h +++ b/aom_dsp/variance.h
@@ -33,10 +33,6 @@ typedef void (*aom_copy32xn_fn_t)(const uint8_t *a, int a_stride, uint8_t *b, int b_stride, int n); -typedef void (*aom_sad_multi_fn_t)(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sad_array); - typedef void (*aom_sad_multi_d_fn_t)(const uint8_t *a, int a_stride, const uint8_t *const b_array[], int b_stride, unsigned int *sad_array); @@ -103,8 +99,6 @@ aom_variance_fn_t vf; aom_subpixvariance_fn_t svf; aom_subp_avg_variance_fn_t svaf; - aom_sad_multi_fn_t sdx3f; - aom_sad_multi_fn_t sdx8f; aom_sad_multi_d_fn_t sdx4df; aom_masked_sad_fn_t msdf; aom_masked_subpixvariance_fn_t msvf;
diff --git a/aom_dsp/x86/sad_sse3.asm b/aom_dsp/x86/sad_sse3.asm deleted file mode 100644 index 50f7f6a..0000000 --- a/aom_dsp/x86/sad_sse3.asm +++ /dev/null
@@ -1,379 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - -%include "aom_ports/x86_abi_support.asm" - -%macro STACK_FRAME_CREATE_X3 0 -%if ABI_IS_32BIT - %define src_ptr rsi - %define src_stride rax - %define ref_ptr rdi - %define ref_stride rdx - %define end_ptr rcx - %define ret_var rbx - %define result_ptr arg(4) - %define height dword ptr arg(4) - push rbp - mov rbp, rsp - push rsi - push rdi - push rbx - - mov rsi, arg(0) ; src_ptr - mov rdi, arg(2) ; ref_ptr - - movsxd rax, dword ptr arg(1) ; src_stride - movsxd rdx, dword ptr arg(3) ; ref_stride -%else - %if LIBAOM_YASM_WIN64 - SAVE_XMM 7, u - %define src_ptr rcx - %define src_stride rdx - %define ref_ptr r8 - %define ref_stride r9 - %define end_ptr r10 - %define ret_var r11 - %define result_ptr [rsp+xmm_stack_space+8+4*8] - %define height dword ptr [rsp+xmm_stack_space+8+4*8] - %else - %define src_ptr rdi - %define src_stride rsi - %define ref_ptr rdx - %define ref_stride rcx - %define end_ptr r9 - %define ret_var r10 - %define result_ptr r8 - %define height r8 - %endif -%endif - -%endmacro - -%macro STACK_FRAME_DESTROY_X3 0 - %define src_ptr - %define src_stride - %define ref_ptr - %define ref_stride - %define end_ptr - %define ret_var - %define result_ptr - %define height - -%if ABI_IS_32BIT - pop rbx - pop rdi - pop rsi - pop rbp -%else - %if LIBAOM_YASM_WIN64 - RESTORE_XMM - %endif -%endif - ret -%endmacro - -%macro PROCESS_16X2X3 5 -%if %1==0 - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm5, XMMWORD PTR [%3] - lddqu xmm6, XMMWORD PTR [%3+1] - lddqu xmm7, XMMWORD PTR [%3+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [%2] - lddqu xmm1, XMMWORD PTR [%3] - lddqu xmm2, XMMWORD PTR [%3+1] - lddqu xmm3, XMMWORD PTR [%3+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [%2+%4] - lddqu xmm1, XMMWORD PTR [%3+%5] - lddqu xmm2, XMMWORD PTR [%3+%5+1] - lddqu xmm3, XMMWORD PTR [%3+%5+2] - -%if %1==0 || %1==1 - lea %2, [%2+%4*2] - lea %3, [%3+%5*2] -%endif - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_8X2X3 5 -%if %1==0 - movq mm0, QWORD PTR [%2] - movq mm5, QWORD PTR [%3] - movq mm6, QWORD PTR [%3+1] - movq mm7, QWORD PTR [%3+2] - - psadbw mm5, mm0 - psadbw mm6, mm0 - psadbw mm7, mm0 -%else - movq mm0, QWORD PTR [%2] - movq mm1, QWORD PTR [%3] - movq mm2, QWORD PTR [%3+1] - movq mm3, QWORD PTR [%3+2] - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 -%endif - movq mm0, QWORD PTR [%2+%4] - movq mm1, QWORD PTR [%3+%5] - movq mm2, QWORD PTR [%3+%5+1] - movq mm3, QWORD PTR [%3+%5+2] - -%if %1==0 || %1==1 - lea %2, [%2+%4*2] - lea %3, [%3+%5*2] -%endif - - psadbw mm1, mm0 - psadbw mm2, mm0 - psadbw mm3, mm0 - - paddw mm5, mm1 - paddw mm6, mm2 - paddw mm7, mm3 -%endmacro - -SECTION .text - -;void int aom_sad16x16x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad16x16x3_sse3) PRIVATE -sym(aom_sad16x16x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+8], xmm0 - - STACK_FRAME_DESTROY_X3 - -;void int aom_sad16x8x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad16x8x3_sse3) PRIVATE -sym(aom_sad16x8x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rcx], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rcx+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rcx+8], xmm0 - - STACK_FRAME_DESTROY_X3 - -;void int aom_sad8x16x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad8x16x3_sse3) PRIVATE -sym(aom_sad8x16x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - punpckldq mm5, mm6 - - movq [rcx], mm5 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;void int aom_sad8x8x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad8x8x3_sse3) PRIVATE -sym(aom_sad8x8x3_sse3): - - STACK_FRAME_CREATE_X3 - - PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - - mov rcx, result_ptr - - punpckldq mm5, mm6 - - movq [rcx], mm5 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3 - -;void int aom_sad4x4x3_sse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad4x4x3_sse3) PRIVATE -sym(aom_sad4x4x3_sse3): - - STACK_FRAME_CREATE_X3 - - movd mm0, DWORD PTR [src_ptr] - movd mm1, DWORD PTR [ref_ptr] - - movd mm2, DWORD PTR [src_ptr+src_stride] - movd mm3, DWORD PTR [ref_ptr+ref_stride] - - punpcklbw mm0, mm2 - punpcklbw mm1, mm3 - - movd mm4, DWORD PTR [ref_ptr+1] - movd mm5, DWORD PTR [ref_ptr+2] - - movd mm2, DWORD PTR [ref_ptr+ref_stride+1] - movd mm3, DWORD PTR [ref_ptr+ref_stride+2] - - psadbw mm1, mm0 - - punpcklbw mm4, mm2 - punpcklbw mm5, mm3 - - psadbw mm4, mm0 - psadbw mm5, mm0 - - lea src_ptr, [src_ptr+src_stride*2] - lea ref_ptr, [ref_ptr+ref_stride*2] - - movd mm0, DWORD PTR [src_ptr] - movd mm2, DWORD PTR [ref_ptr] - - movd mm3, DWORD PTR [src_ptr+src_stride] - movd mm6, DWORD PTR [ref_ptr+ref_stride] - - punpcklbw mm0, mm3 - punpcklbw mm2, mm6 - - movd mm3, DWORD PTR [ref_ptr+1] - movd mm7, DWORD PTR [ref_ptr+2] - - psadbw mm2, mm0 - - paddw mm1, mm2 - - movd mm2, DWORD PTR [ref_ptr+ref_stride+1] - movd mm6, DWORD PTR [ref_ptr+ref_stride+2] - - punpcklbw mm3, mm2 - punpcklbw mm7, mm6 - - psadbw mm3, mm0 - psadbw mm7, mm0 - - paddw mm3, mm4 - paddw mm7, mm5 - - mov rcx, result_ptr - - punpckldq mm1, mm3 - - movq [rcx], mm1 - movd [rcx+8], mm7 - - STACK_FRAME_DESTROY_X3
diff --git a/aom_dsp/x86/sad_sse4.asm b/aom_dsp/x86/sad_sse4.asm deleted file mode 100644 index 2f8cd57..0000000 --- a/aom_dsp/x86/sad_sse4.asm +++ /dev/null
@@ -1,364 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - - -%include "aom_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X8 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - movq xmm2, MMWORD PTR [rdi+16] - punpcklqdq xmm1, xmm3 - punpcklqdq xmm3, xmm2 - - movdqa xmm2, xmm1 - mpsadbw xmm1, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm1, xmm2 - paddw xmm1, xmm3 - paddw xmm1, xmm4 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - movq xmm2, MMWORD PTR [rdi+16] - punpcklqdq xmm5, xmm3 - punpcklqdq xmm3, xmm2 - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm5, xmm2 - paddw xmm5, xmm3 - paddw xmm5, xmm4 - - paddw xmm1, xmm5 -%endif - movdqa xmm0, XMMWORD PTR [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - movq xmm2, MMWORD PTR [rdi+ rdx+16] - punpcklqdq xmm5, xmm3 - punpcklqdq xmm3, xmm2 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - - psrldq xmm0, 8 - movdqa xmm4, xmm3 - mpsadbw xmm3, xmm0, 0x0 - mpsadbw xmm4, xmm0, 0x5 - - paddw xmm5, xmm2 - paddw xmm5, xmm3 - paddw xmm5, xmm4 - - paddw xmm1, xmm5 -%endmacro - -%macro PROCESS_8X2X8 1 -%if %1 - movq xmm0, MMWORD PTR [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm1, xmm3 - - movdqa xmm2, xmm1 - mpsadbw xmm1, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm1, xmm2 -%else - movq xmm0, MMWORD PTR [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm5, xmm3 - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm5, xmm2 - - paddw xmm1, xmm5 -%endif - movq xmm0, MMWORD PTR [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - punpcklqdq xmm5, xmm3 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movdqa xmm2, xmm5 - mpsadbw xmm5, xmm0, 0x0 - mpsadbw xmm2, xmm0, 0x5 - paddw xmm5, xmm2 - - paddw xmm1, xmm5 -%endmacro - -%macro PROCESS_4X2X8 1 -%if %1 - movd xmm0, [rsi] - movq xmm1, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm1, xmm3 - - mpsadbw xmm1, xmm0, 0x0 -%else - movd xmm0, [rsi] - movq xmm5, MMWORD PTR [rdi] - movq xmm3, MMWORD PTR [rdi+8] - punpcklqdq xmm5, xmm3 - - mpsadbw xmm5, xmm0, 0x0 - - paddw xmm1, xmm5 -%endif - movd xmm0, [rsi + rax] - movq xmm5, MMWORD PTR [rdi+ rdx] - movq xmm3, MMWORD PTR [rdi+ rdx+8] - punpcklqdq xmm5, xmm3 - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - mpsadbw xmm5, xmm0, 0x0 - - paddw xmm1, xmm5 -%endmacro - -%macro WRITE_AS_INTS 0 - mov rdi, arg(4) ;Results - pxor xmm0, xmm0 - movdqa xmm2, xmm1 - punpcklwd xmm1, xmm0 - punpckhwd xmm2, xmm0 - - movdqa [rdi], xmm1 - movdqa [rdi + 16], xmm2 -%endmacro - -SECTION .text - -;void aom_sad16x16x8_sse4_1( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array); -global sym(aom_sad16x16x8_sse4_1) PRIVATE -sym(aom_sad16x16x8_sse4_1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_16X2X8 1 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - - WRITE_AS_INTS - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void aom_sad16x8x8_sse4_1( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(aom_sad16x8x8_sse4_1) PRIVATE -sym(aom_sad16x8x8_sse4_1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_16X2X8 1 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - PROCESS_16X2X8 0 - - WRITE_AS_INTS - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void aom_sad8x8x8_sse4_1( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(aom_sad8x8x8_sse4_1) PRIVATE -sym(aom_sad8x8x8_sse4_1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_8X2X8 1 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - - WRITE_AS_INTS - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void aom_sad8x16x8_sse4_1( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(aom_sad8x16x8_sse4_1) PRIVATE -sym(aom_sad8x16x8_sse4_1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_8X2X8 1 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - PROCESS_8X2X8 0 - - WRITE_AS_INTS - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - -;void aom_sad4x4x8_sse4_1( -; const unsigned char *src_ptr, -; int src_stride, -; const unsigned char *ref_ptr, -; int ref_stride, -; unsigned short *sad_array -;); -global sym(aom_sad4x4x8_sse4_1) PRIVATE -sym(aom_sad4x4x8_sse4_1): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - PROCESS_4X2X8 1 - PROCESS_4X2X8 0 - - WRITE_AS_INTS - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - - - -
diff --git a/aom_dsp/x86/sad_ssse3.asm b/aom_dsp/x86/sad_ssse3.asm deleted file mode 100644 index 078a9f2..0000000 --- a/aom_dsp/x86/sad_ssse3.asm +++ /dev/null
@@ -1,375 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - - -%include "aom_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X3 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm5, XMMWORD PTR [rdi] - lddqu xmm6, XMMWORD PTR [rdi+1] - lddqu xmm7, XMMWORD PTR [rdi+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm1, XMMWORD PTR [rdi] - lddqu xmm2, XMMWORD PTR [rdi+1] - lddqu xmm3, XMMWORD PTR [rdi+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - lddqu xmm1, XMMWORD PTR [rdi+rdx] - lddqu xmm2, XMMWORD PTR [rdi+rdx+1] - lddqu xmm3, XMMWORD PTR [rdi+rdx+2] - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X2X3_OFFSET 2 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm7, XMMWORD PTR [rdi+16] - - movdqa xmm5, xmm7 - palignr xmm5, xmm4, %2 - - movdqa xmm6, xmm7 - palignr xmm6, xmm4, (%2+1) - - palignr xmm7, xmm4, (%2+2) - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm3, XMMWORD PTR [rdi+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - movdqa xmm4, XMMWORD PTR [rdi+rdx] - movdqa xmm3, XMMWORD PTR [rdi+rdx+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X16X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -%macro PROCESS_16X8X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -SECTION .text - -;void int aom_sad16x16x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad16x16x3_ssse3) PRIVATE -sym(aom_sad16x16x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .aom_sad16x16x3_ssse3_skiptable -.aom_sad16x16x3_ssse3_jumptable: - dd .aom_sad16x16x3_ssse3_aligned_by_0 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_1 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_2 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_3 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_4 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_5 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_6 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_7 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_8 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_9 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_10 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_11 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_12 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_13 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_14 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_15 - .aom_sad16x16x3_ssse3_do_jump -.aom_sad16x16x3_ssse3_skiptable: - - call .aom_sad16x16x3_ssse3_do_jump -.aom_sad16x16x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .aom_sad16x16x3_ssse3_jumptable - .aom_sad16x16x3_ssse3_do_jump - add rax, rcx ; get the absolute address of aom_sad16x16x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X16X3_OFFSET 0, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 1, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 2, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 3, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 4, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 5, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 6, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 7, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 8, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 9, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 10, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 11, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 12, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 13, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 14, .aom_sad16x16x3_ssse3 - -.aom_sad16x16x3_ssse3_aligned_by_15: - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.aom_sad16x16x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void int aom_sad16x8x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad16x8x3_ssse3) PRIVATE -sym(aom_sad16x8x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .aom_sad16x8x3_ssse3_skiptable -.aom_sad16x8x3_ssse3_jumptable: - dd .aom_sad16x8x3_ssse3_aligned_by_0 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_1 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_2 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_3 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_4 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_5 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_6 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_7 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_8 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_9 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_10 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_11 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_12 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_13 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_14 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_15 - .aom_sad16x8x3_ssse3_do_jump -.aom_sad16x8x3_ssse3_skiptable: - - call .aom_sad16x8x3_ssse3_do_jump -.aom_sad16x8x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .aom_sad16x8x3_ssse3_jumptable - .aom_sad16x8x3_ssse3_do_jump - add rax, rcx ; get the absolute address of aom_sad16x8x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X8X3_OFFSET 0, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 1, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 2, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 3, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 4, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 5, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 6, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 7, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 8, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 9, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 10, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 11, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 12, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 13, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 14, .aom_sad16x8x3_ssse3 - -.aom_sad16x8x3_ssse3_aligned_by_15: - - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.aom_sad16x8x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index e0ca707..2fd4303 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -206,11 +206,6 @@ # # Motion search # - add_proto qw/int av1_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv"; - specialize qw/av1_full_search_sad sse3 sse4_1/; - $av1_full_search_sad_sse3=av1_full_search_sadx3; - $av1_full_search_sad_sse4_1=av1_full_search_sadx8; - add_proto qw/int av1_diamond_search_sad/, "struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv"; add_proto qw/int av1_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct aom_variance_vtable *fn_ptr, const struct mv *center_mv";
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 0f3c265..f15ad19 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -1004,17 +1004,14 @@ (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000; } -#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF, JSDAF, \ - JSVAF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx3f = SDX3F; \ - cpi->fn_ptr[BT].sdx8f = SDX8F; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; \ - cpi->fn_ptr[BT].jsdaf = JSDAF; \ +#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].sdaf = SDAF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svaf = SVAF; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].jsdaf = JSDAF; \ cpi->fn_ptr[BT].jsvaf = JSVAF; #define MAKE_BFP_SAD_WRAPPER(fnname) \ @@ -1053,47 +1050,6 @@ 4; \ } -#define MAKE_BFP_SAD3_WRAPPER(fnname) \ - static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *ref_ptr, int ref_stride, \ - unsigned int *sad_array) { \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - } \ - static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *ref_ptr, int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 3; i++) sad_array[i] >>= 2; \ - } \ - static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *ref_ptr, int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 3; i++) sad_array[i] >>= 4; \ - } - -#define MAKE_BFP_SAD8_WRAPPER(fnname) \ - static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *ref_ptr, int ref_stride, \ - unsigned int *sad_array) { \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - } \ - static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *ref_ptr, int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 8; i++) sad_array[i] >>= 2; \ - } \ - static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *ref_ptr, int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 8; i++) sad_array[i] >>= 4; \ - } #define MAKE_BFP_SAD4D_WRAPPER(fnname) \ static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \ const uint8_t *const ref_ptr[], int ref_stride, \ @@ -1142,8 +1098,6 @@ MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x128) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x128_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad128x128x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad128x128x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x128x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x64) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x64_avg) @@ -1165,46 +1119,30 @@ MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x64x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x32) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x32_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad32x32x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad32x32x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x32x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x64) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x64_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad64x64x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad64x64x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x64x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x16) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x16_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad16x16x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad16x16x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x16x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x8) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x8_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad16x8x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad16x8x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x8x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x16) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x16_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad8x16x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad8x16x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x16x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x8) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x8_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad8x8x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad8x8x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x8x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x4) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x4_avg) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad8x4x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x4x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x8) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x8_avg) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x8x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x8x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x4) MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x4_avg) -MAKE_BFP_SAD3_WRAPPER(aom_highbd_sad4x4x3) -MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x4x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d) MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16) @@ -1354,7 +1292,7 @@ HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits8, aom_highbd_sad64x16_avg_bits8, aom_highbd_8_variance64x16, aom_highbd_8_sub_pixel_variance64x16, - aom_highbd_8_sub_pixel_avg_variance64x16, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance64x16, aom_highbd_sad64x16x4d_bits8, aom_highbd_jnt_sad64x16_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance64x16) @@ -1362,7 +1300,7 @@ HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits8, aom_highbd_sad16x64_avg_bits8, aom_highbd_8_variance16x64, aom_highbd_8_sub_pixel_variance16x64, - aom_highbd_8_sub_pixel_avg_variance16x64, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance16x64, aom_highbd_sad16x64x4d_bits8, aom_highbd_jnt_sad16x64_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance16x64) @@ -1370,35 +1308,35 @@ HIGHBD_BFP( BLOCK_32X8, aom_highbd_sad32x8_bits8, aom_highbd_sad32x8_avg_bits8, aom_highbd_8_variance32x8, aom_highbd_8_sub_pixel_variance32x8, - aom_highbd_8_sub_pixel_avg_variance32x8, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance32x8, aom_highbd_sad32x8x4d_bits8, aom_highbd_jnt_sad32x8_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance32x8) HIGHBD_BFP( BLOCK_8X32, aom_highbd_sad8x32_bits8, aom_highbd_sad8x32_avg_bits8, aom_highbd_8_variance8x32, aom_highbd_8_sub_pixel_variance8x32, - aom_highbd_8_sub_pixel_avg_variance8x32, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance8x32, aom_highbd_sad8x32x4d_bits8, aom_highbd_jnt_sad8x32_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance8x32) HIGHBD_BFP( BLOCK_16X4, aom_highbd_sad16x4_bits8, aom_highbd_sad16x4_avg_bits8, aom_highbd_8_variance16x4, aom_highbd_8_sub_pixel_variance16x4, - aom_highbd_8_sub_pixel_avg_variance16x4, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance16x4, aom_highbd_sad16x4x4d_bits8, aom_highbd_jnt_sad16x4_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance16x4) HIGHBD_BFP( BLOCK_4X16, aom_highbd_sad4x16_bits8, aom_highbd_sad4x16_avg_bits8, aom_highbd_8_variance4x16, aom_highbd_8_sub_pixel_variance4x16, - aom_highbd_8_sub_pixel_avg_variance4x16, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance4x16, aom_highbd_sad4x16x4d_bits8, aom_highbd_jnt_sad4x16_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance4x16) HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8, aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16, aom_highbd_8_sub_pixel_variance32x16, - aom_highbd_8_sub_pixel_avg_variance32x16, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance32x16, aom_highbd_sad32x16x4d_bits8, aom_highbd_jnt_sad32x16_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance32x16) @@ -1406,7 +1344,7 @@ HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits8, aom_highbd_sad16x32_avg_bits8, aom_highbd_8_variance16x32, aom_highbd_8_sub_pixel_variance16x32, - aom_highbd_8_sub_pixel_avg_variance16x32, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance16x32, aom_highbd_sad16x32x4d_bits8, aom_highbd_jnt_sad16x32_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance16x32) @@ -1414,7 +1352,7 @@ HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits8, aom_highbd_sad64x32_avg_bits8, aom_highbd_8_variance64x32, aom_highbd_8_sub_pixel_variance64x32, - aom_highbd_8_sub_pixel_avg_variance64x32, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance64x32, aom_highbd_sad64x32x4d_bits8, aom_highbd_jnt_sad64x32_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance64x32) @@ -1422,7 +1360,7 @@ HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits8, aom_highbd_sad32x64_avg_bits8, aom_highbd_8_variance32x64, aom_highbd_8_sub_pixel_variance32x64, - aom_highbd_8_sub_pixel_avg_variance32x64, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance32x64, aom_highbd_sad32x64x4d_bits8, aom_highbd_jnt_sad32x64_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance32x64) @@ -1431,7 +1369,6 @@ aom_highbd_sad32x32_avg_bits8, aom_highbd_8_variance32x32, aom_highbd_8_sub_pixel_variance32x32, aom_highbd_8_sub_pixel_avg_variance32x32, - aom_highbd_sad32x32x3_bits8, aom_highbd_sad32x32x8_bits8, aom_highbd_sad32x32x4d_bits8, aom_highbd_jnt_sad32x32_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance32x32) @@ -1440,7 +1377,6 @@ aom_highbd_sad64x64_avg_bits8, aom_highbd_8_variance64x64, aom_highbd_8_sub_pixel_variance64x64, aom_highbd_8_sub_pixel_avg_variance64x64, - aom_highbd_sad64x64x3_bits8, aom_highbd_sad64x64x8_bits8, aom_highbd_sad64x64x4d_bits8, aom_highbd_jnt_sad64x64_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance64x64) @@ -1449,7 +1385,6 @@ aom_highbd_sad16x16_avg_bits8, aom_highbd_8_variance16x16, aom_highbd_8_sub_pixel_variance16x16, aom_highbd_8_sub_pixel_avg_variance16x16, - aom_highbd_sad16x16x3_bits8, aom_highbd_sad16x16x8_bits8, aom_highbd_sad16x16x4d_bits8, aom_highbd_jnt_sad16x16_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance16x16) @@ -1457,48 +1392,42 @@ HIGHBD_BFP( BLOCK_16X8, aom_highbd_sad16x8_bits8, aom_highbd_sad16x8_avg_bits8, aom_highbd_8_variance16x8, aom_highbd_8_sub_pixel_variance16x8, - aom_highbd_8_sub_pixel_avg_variance16x8, aom_highbd_sad16x8x3_bits8, - aom_highbd_sad16x8x8_bits8, aom_highbd_sad16x8x4d_bits8, - aom_highbd_jnt_sad16x8_avg_bits8, + aom_highbd_8_sub_pixel_avg_variance16x8, + aom_highbd_sad16x8x4d_bits8, aom_highbd_jnt_sad16x8_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance16x8) HIGHBD_BFP( BLOCK_8X16, aom_highbd_sad8x16_bits8, aom_highbd_sad8x16_avg_bits8, aom_highbd_8_variance8x16, aom_highbd_8_sub_pixel_variance8x16, - aom_highbd_8_sub_pixel_avg_variance8x16, aom_highbd_sad8x16x3_bits8, - aom_highbd_sad8x16x8_bits8, aom_highbd_sad8x16x4d_bits8, - aom_highbd_jnt_sad8x16_avg_bits8, + aom_highbd_8_sub_pixel_avg_variance8x16, + aom_highbd_sad8x16x4d_bits8, aom_highbd_jnt_sad8x16_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance8x16) HIGHBD_BFP(BLOCK_8X8, aom_highbd_sad8x8_bits8, aom_highbd_sad8x8_avg_bits8, aom_highbd_8_variance8x8, aom_highbd_8_sub_pixel_variance8x8, aom_highbd_8_sub_pixel_avg_variance8x8, - aom_highbd_sad8x8x3_bits8, aom_highbd_sad8x8x8_bits8, aom_highbd_sad8x8x4d_bits8, aom_highbd_jnt_sad8x8_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance8x8) HIGHBD_BFP(BLOCK_8X4, aom_highbd_sad8x4_bits8, aom_highbd_sad8x4_avg_bits8, aom_highbd_8_variance8x4, aom_highbd_8_sub_pixel_variance8x4, - aom_highbd_8_sub_pixel_avg_variance8x4, NULL, - aom_highbd_sad8x4x8_bits8, aom_highbd_sad8x4x4d_bits8, - aom_highbd_jnt_sad8x4_avg_bits8, + aom_highbd_8_sub_pixel_avg_variance8x4, + aom_highbd_sad8x4x4d_bits8, aom_highbd_jnt_sad8x4_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance8x4) HIGHBD_BFP(BLOCK_4X8, aom_highbd_sad4x8_bits8, aom_highbd_sad4x8_avg_bits8, aom_highbd_8_variance4x8, aom_highbd_8_sub_pixel_variance4x8, - aom_highbd_8_sub_pixel_avg_variance4x8, NULL, - aom_highbd_sad4x8x8_bits8, aom_highbd_sad4x8x4d_bits8, - aom_highbd_jnt_sad4x8_avg_bits8, + aom_highbd_8_sub_pixel_avg_variance4x8, + aom_highbd_sad4x8x4d_bits8, aom_highbd_jnt_sad4x8_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance4x8) HIGHBD_BFP(BLOCK_4X4, aom_highbd_sad4x4_bits8, aom_highbd_sad4x4_avg_bits8, aom_highbd_8_variance4x4, aom_highbd_8_sub_pixel_variance4x4, aom_highbd_8_sub_pixel_avg_variance4x4, - aom_highbd_sad4x4x3_bits8, aom_highbd_sad4x4x8_bits8, aom_highbd_sad4x4x4d_bits8, aom_highbd_jnt_sad4x4_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance4x4) @@ -1507,14 +1436,13 @@ aom_highbd_sad128x128_avg_bits8, aom_highbd_8_variance128x128, aom_highbd_8_sub_pixel_variance128x128, aom_highbd_8_sub_pixel_avg_variance128x128, - aom_highbd_sad128x128x3_bits8, aom_highbd_sad128x128x8_bits8, aom_highbd_sad128x128x4d_bits8, aom_highbd_jnt_sad128x128_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance128x128) HIGHBD_BFP(BLOCK_128X64, aom_highbd_sad128x64_bits8, aom_highbd_sad128x64_avg_bits8, aom_highbd_8_variance128x64, aom_highbd_8_sub_pixel_variance128x64, - aom_highbd_8_sub_pixel_avg_variance128x64, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance128x64, aom_highbd_sad128x64x4d_bits8, aom_highbd_jnt_sad128x64_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance128x64) @@ -1522,7 +1450,7 @@ HIGHBD_BFP(BLOCK_64X128, aom_highbd_sad64x128_bits8, aom_highbd_sad64x128_avg_bits8, aom_highbd_8_variance64x128, aom_highbd_8_sub_pixel_variance64x128, - aom_highbd_8_sub_pixel_avg_variance64x128, NULL, NULL, + aom_highbd_8_sub_pixel_avg_variance64x128, aom_highbd_sad64x128x4d_bits8, aom_highbd_jnt_sad64x128_avg_bits8, aom_highbd_8_jnt_sub_pixel_avg_variance64x128) @@ -1643,7 +1571,7 @@ HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits10, aom_highbd_sad64x16_avg_bits10, aom_highbd_10_variance64x16, aom_highbd_10_sub_pixel_variance64x16, - aom_highbd_10_sub_pixel_avg_variance64x16, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance64x16, aom_highbd_sad64x16x4d_bits10, aom_highbd_jnt_sad64x16_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance64x16); @@ -1651,7 +1579,7 @@ HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits10, aom_highbd_sad16x64_avg_bits10, aom_highbd_10_variance16x64, aom_highbd_10_sub_pixel_variance16x64, - aom_highbd_10_sub_pixel_avg_variance16x64, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance16x64, aom_highbd_sad16x64x4d_bits10, aom_highbd_jnt_sad16x64_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance16x64); @@ -1659,7 +1587,7 @@ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10, aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8, aom_highbd_10_sub_pixel_variance32x8, - aom_highbd_10_sub_pixel_avg_variance32x8, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance32x8, aom_highbd_sad32x8x4d_bits10, aom_highbd_jnt_sad32x8_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance32x8); @@ -1667,7 +1595,7 @@ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10, aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32, aom_highbd_10_sub_pixel_variance8x32, - aom_highbd_10_sub_pixel_avg_variance8x32, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance8x32, aom_highbd_sad8x32x4d_bits10, aom_highbd_jnt_sad8x32_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance8x32); @@ -1675,7 +1603,7 @@ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10, aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4, aom_highbd_10_sub_pixel_variance16x4, - aom_highbd_10_sub_pixel_avg_variance16x4, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance16x4, aom_highbd_sad16x4x4d_bits10, aom_highbd_jnt_sad16x4_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance16x4); @@ -1683,7 +1611,7 @@ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10, aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16, aom_highbd_10_sub_pixel_variance4x16, - aom_highbd_10_sub_pixel_avg_variance4x16, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance4x16, aom_highbd_sad4x16x4d_bits10, aom_highbd_jnt_sad4x16_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance4x16); @@ -1691,7 +1619,7 @@ HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10, aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16, aom_highbd_10_sub_pixel_variance32x16, - aom_highbd_10_sub_pixel_avg_variance32x16, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance32x16, aom_highbd_sad32x16x4d_bits10, aom_highbd_jnt_sad32x16_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance32x16); @@ -1699,7 +1627,7 @@ HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits10, aom_highbd_sad16x32_avg_bits10, aom_highbd_10_variance16x32, aom_highbd_10_sub_pixel_variance16x32, - aom_highbd_10_sub_pixel_avg_variance16x32, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance16x32, aom_highbd_sad16x32x4d_bits10, aom_highbd_jnt_sad16x32_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance16x32); @@ -1707,7 +1635,7 @@ HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits10, aom_highbd_sad64x32_avg_bits10, aom_highbd_10_variance64x32, aom_highbd_10_sub_pixel_variance64x32, - aom_highbd_10_sub_pixel_avg_variance64x32, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance64x32, aom_highbd_sad64x32x4d_bits10, aom_highbd_jnt_sad64x32_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance64x32); @@ -1715,7 +1643,7 @@ HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits10, aom_highbd_sad32x64_avg_bits10, aom_highbd_10_variance32x64, aom_highbd_10_sub_pixel_variance32x64, - aom_highbd_10_sub_pixel_avg_variance32x64, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance32x64, aom_highbd_sad32x64x4d_bits10, aom_highbd_jnt_sad32x64_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance32x64); @@ -1724,7 +1652,6 @@ aom_highbd_sad32x32_avg_bits10, aom_highbd_10_variance32x32, aom_highbd_10_sub_pixel_variance32x32, aom_highbd_10_sub_pixel_avg_variance32x32, - aom_highbd_sad32x32x3_bits10, aom_highbd_sad32x32x8_bits10, aom_highbd_sad32x32x4d_bits10, aom_highbd_jnt_sad32x32_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance32x32); @@ -1733,7 +1660,6 @@ aom_highbd_sad64x64_avg_bits10, aom_highbd_10_variance64x64, aom_highbd_10_sub_pixel_variance64x64, aom_highbd_10_sub_pixel_avg_variance64x64, - aom_highbd_sad64x64x3_bits10, aom_highbd_sad64x64x8_bits10, aom_highbd_sad64x64x4d_bits10, aom_highbd_jnt_sad64x64_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance64x64); @@ -1742,7 +1668,6 @@ aom_highbd_sad16x16_avg_bits10, aom_highbd_10_variance16x16, aom_highbd_10_sub_pixel_variance16x16, aom_highbd_10_sub_pixel_avg_variance16x16, - aom_highbd_sad16x16x3_bits10, aom_highbd_sad16x16x8_bits10, aom_highbd_sad16x16x4d_bits10, aom_highbd_jnt_sad16x16_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance16x16); @@ -1751,7 +1676,6 @@ aom_highbd_sad16x8_avg_bits10, aom_highbd_10_variance16x8, aom_highbd_10_sub_pixel_variance16x8, aom_highbd_10_sub_pixel_avg_variance16x8, - aom_highbd_sad16x8x3_bits10, aom_highbd_sad16x8x8_bits10, aom_highbd_sad16x8x4d_bits10, aom_highbd_jnt_sad16x8_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance16x8); @@ -1760,7 +1684,6 @@ aom_highbd_sad8x16_avg_bits10, aom_highbd_10_variance8x16, aom_highbd_10_sub_pixel_variance8x16, aom_highbd_10_sub_pixel_avg_variance8x16, - aom_highbd_sad8x16x3_bits10, aom_highbd_sad8x16x8_bits10, aom_highbd_sad8x16x4d_bits10, aom_highbd_jnt_sad8x16_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance8x16); @@ -1768,50 +1691,45 @@ HIGHBD_BFP( BLOCK_8X8, aom_highbd_sad8x8_bits10, aom_highbd_sad8x8_avg_bits10, aom_highbd_10_variance8x8, aom_highbd_10_sub_pixel_variance8x8, - aom_highbd_10_sub_pixel_avg_variance8x8, aom_highbd_sad8x8x3_bits10, - aom_highbd_sad8x8x8_bits10, aom_highbd_sad8x8x4d_bits10, - aom_highbd_jnt_sad8x8_avg_bits10, + aom_highbd_10_sub_pixel_avg_variance8x8, + aom_highbd_sad8x8x4d_bits10, aom_highbd_jnt_sad8x8_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance8x8); - HIGHBD_BFP(BLOCK_8X4, aom_highbd_sad8x4_bits10, - aom_highbd_sad8x4_avg_bits10, aom_highbd_10_variance8x4, - aom_highbd_10_sub_pixel_variance8x4, - aom_highbd_10_sub_pixel_avg_variance8x4, NULL, - aom_highbd_sad8x4x8_bits10, aom_highbd_sad8x4x4d_bits10, - aom_highbd_jnt_sad8x4_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance8x4); + HIGHBD_BFP( + BLOCK_8X4, aom_highbd_sad8x4_bits10, aom_highbd_sad8x4_avg_bits10, + aom_highbd_10_variance8x4, aom_highbd_10_sub_pixel_variance8x4, + aom_highbd_10_sub_pixel_avg_variance8x4, + aom_highbd_sad8x4x4d_bits10, aom_highbd_jnt_sad8x4_avg_bits10, + aom_highbd_10_jnt_sub_pixel_avg_variance8x4); - HIGHBD_BFP(BLOCK_4X8, aom_highbd_sad4x8_bits10, - aom_highbd_sad4x8_avg_bits10, aom_highbd_10_variance4x8, - aom_highbd_10_sub_pixel_variance4x8, - aom_highbd_10_sub_pixel_avg_variance4x8, NULL, - aom_highbd_sad4x8x8_bits10, aom_highbd_sad4x8x4d_bits10, - aom_highbd_jnt_sad4x8_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance4x8); + HIGHBD_BFP( + BLOCK_4X8, aom_highbd_sad4x8_bits10, aom_highbd_sad4x8_avg_bits10, + aom_highbd_10_variance4x8, aom_highbd_10_sub_pixel_variance4x8, + aom_highbd_10_sub_pixel_avg_variance4x8, + aom_highbd_sad4x8x4d_bits10, aom_highbd_jnt_sad4x8_avg_bits10, + aom_highbd_10_jnt_sub_pixel_avg_variance4x8); HIGHBD_BFP( BLOCK_4X4, aom_highbd_sad4x4_bits10, aom_highbd_sad4x4_avg_bits10, aom_highbd_10_variance4x4, aom_highbd_10_sub_pixel_variance4x4, - aom_highbd_10_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits10, - aom_highbd_sad4x4x8_bits10, aom_highbd_sad4x4x4d_bits10, - aom_highbd_jnt_sad4x4_avg_bits10, + aom_highbd_10_sub_pixel_avg_variance4x4, + aom_highbd_sad4x4x4d_bits10, aom_highbd_jnt_sad4x4_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance4x4); - HIGHBD_BFP( - BLOCK_128X128, aom_highbd_sad128x128_bits10, - aom_highbd_sad128x128_avg_bits10, aom_highbd_10_variance128x128, - aom_highbd_10_sub_pixel_variance128x128, - aom_highbd_10_sub_pixel_avg_variance128x128, - aom_highbd_sad128x128x3_bits10, aom_highbd_sad128x128x8_bits10, - aom_highbd_sad128x128x4d_bits10, - aom_highbd_jnt_sad128x128_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance128x128); + HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits10, + aom_highbd_sad128x128_avg_bits10, + aom_highbd_10_variance128x128, + aom_highbd_10_sub_pixel_variance128x128, + aom_highbd_10_sub_pixel_avg_variance128x128, + aom_highbd_sad128x128x4d_bits10, + aom_highbd_jnt_sad128x128_avg_bits10, + aom_highbd_10_jnt_sub_pixel_avg_variance128x128); HIGHBD_BFP( BLOCK_128X64, aom_highbd_sad128x64_bits10, aom_highbd_sad128x64_avg_bits10, aom_highbd_10_variance128x64, aom_highbd_10_sub_pixel_variance128x64, - aom_highbd_10_sub_pixel_avg_variance128x64, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance128x64, aom_highbd_sad128x64x4d_bits10, aom_highbd_jnt_sad128x64_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance128x64); @@ -1819,7 +1737,7 @@ BLOCK_64X128, aom_highbd_sad64x128_bits10, aom_highbd_sad64x128_avg_bits10, aom_highbd_10_variance64x128, aom_highbd_10_sub_pixel_variance64x128, - aom_highbd_10_sub_pixel_avg_variance64x128, NULL, NULL, + aom_highbd_10_sub_pixel_avg_variance64x128, aom_highbd_sad64x128x4d_bits10, aom_highbd_jnt_sad64x128_avg_bits10, aom_highbd_10_jnt_sub_pixel_avg_variance64x128); @@ -1945,7 +1863,7 @@ HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits12, aom_highbd_sad64x16_avg_bits12, aom_highbd_12_variance64x16, aom_highbd_12_sub_pixel_variance64x16, - aom_highbd_12_sub_pixel_avg_variance64x16, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance64x16, aom_highbd_sad64x16x4d_bits12, aom_highbd_jnt_sad64x16_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance64x16); @@ -1953,7 +1871,7 @@ HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits12, aom_highbd_sad16x64_avg_bits12, aom_highbd_12_variance16x64, aom_highbd_12_sub_pixel_variance16x64, - aom_highbd_12_sub_pixel_avg_variance16x64, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance16x64, aom_highbd_sad16x64x4d_bits12, aom_highbd_jnt_sad16x64_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance16x64); @@ -1961,7 +1879,7 @@ HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12, aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8, aom_highbd_12_sub_pixel_variance32x8, - aom_highbd_12_sub_pixel_avg_variance32x8, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance32x8, aom_highbd_sad32x8x4d_bits12, aom_highbd_jnt_sad32x8_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance32x8); @@ -1969,7 +1887,7 @@ HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12, aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32, aom_highbd_12_sub_pixel_variance8x32, - aom_highbd_12_sub_pixel_avg_variance8x32, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance8x32, aom_highbd_sad8x32x4d_bits12, aom_highbd_jnt_sad8x32_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance8x32); @@ -1977,7 +1895,7 @@ HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12, aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4, aom_highbd_12_sub_pixel_variance16x4, - aom_highbd_12_sub_pixel_avg_variance16x4, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance16x4, aom_highbd_sad16x4x4d_bits12, aom_highbd_jnt_sad16x4_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance16x4); @@ -1985,7 +1903,7 @@ HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12, aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16, aom_highbd_12_sub_pixel_variance4x16, - aom_highbd_12_sub_pixel_avg_variance4x16, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance4x16, aom_highbd_sad4x16x4d_bits12, aom_highbd_jnt_sad4x16_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance4x16); @@ -1993,7 +1911,7 @@ HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12, aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16, aom_highbd_12_sub_pixel_variance32x16, - aom_highbd_12_sub_pixel_avg_variance32x16, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance32x16, aom_highbd_sad32x16x4d_bits12, aom_highbd_jnt_sad32x16_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance32x16); @@ -2001,7 +1919,7 @@ HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits12, aom_highbd_sad16x32_avg_bits12, aom_highbd_12_variance16x32, aom_highbd_12_sub_pixel_variance16x32, - aom_highbd_12_sub_pixel_avg_variance16x32, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance16x32, aom_highbd_sad16x32x4d_bits12, aom_highbd_jnt_sad16x32_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance16x32); @@ -2009,7 +1927,7 @@ HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits12, aom_highbd_sad64x32_avg_bits12, aom_highbd_12_variance64x32, aom_highbd_12_sub_pixel_variance64x32, - aom_highbd_12_sub_pixel_avg_variance64x32, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance64x32, aom_highbd_sad64x32x4d_bits12, aom_highbd_jnt_sad64x32_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance64x32); @@ -2017,7 +1935,7 @@ HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits12, aom_highbd_sad32x64_avg_bits12, aom_highbd_12_variance32x64, aom_highbd_12_sub_pixel_variance32x64, - aom_highbd_12_sub_pixel_avg_variance32x64, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance32x64, aom_highbd_sad32x64x4d_bits12, aom_highbd_jnt_sad32x64_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance32x64); @@ -2026,7 +1944,6 @@ aom_highbd_sad32x32_avg_bits12, aom_highbd_12_variance32x32, aom_highbd_12_sub_pixel_variance32x32, aom_highbd_12_sub_pixel_avg_variance32x32, - aom_highbd_sad32x32x3_bits12, aom_highbd_sad32x32x8_bits12, aom_highbd_sad32x32x4d_bits12, aom_highbd_jnt_sad32x32_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance32x32); @@ -2035,7 +1952,6 @@ aom_highbd_sad64x64_avg_bits12, aom_highbd_12_variance64x64, aom_highbd_12_sub_pixel_variance64x64, aom_highbd_12_sub_pixel_avg_variance64x64, - aom_highbd_sad64x64x3_bits12, aom_highbd_sad64x64x8_bits12, aom_highbd_sad64x64x4d_bits12, aom_highbd_jnt_sad64x64_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance64x64); @@ -2044,7 +1960,6 @@ aom_highbd_sad16x16_avg_bits12, aom_highbd_12_variance16x16, aom_highbd_12_sub_pixel_variance16x16, aom_highbd_12_sub_pixel_avg_variance16x16, - aom_highbd_sad16x16x3_bits12, aom_highbd_sad16x16x8_bits12, aom_highbd_sad16x16x4d_bits12, aom_highbd_jnt_sad16x16_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance16x16); @@ -2053,7 +1968,6 @@ aom_highbd_sad16x8_avg_bits12, aom_highbd_12_variance16x8, aom_highbd_12_sub_pixel_variance16x8, aom_highbd_12_sub_pixel_avg_variance16x8, - aom_highbd_sad16x8x3_bits12, aom_highbd_sad16x8x8_bits12, aom_highbd_sad16x8x4d_bits12, aom_highbd_jnt_sad16x8_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance16x8); @@ -2062,7 +1976,6 @@ aom_highbd_sad8x16_avg_bits12, aom_highbd_12_variance8x16, aom_highbd_12_sub_pixel_variance8x16, aom_highbd_12_sub_pixel_avg_variance8x16, - aom_highbd_sad8x16x3_bits12, aom_highbd_sad8x16x8_bits12, aom_highbd_sad8x16x4d_bits12, aom_highbd_jnt_sad8x16_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance8x16); @@ -2070,50 +1983,45 @@ HIGHBD_BFP( BLOCK_8X8, aom_highbd_sad8x8_bits12, aom_highbd_sad8x8_avg_bits12, aom_highbd_12_variance8x8, aom_highbd_12_sub_pixel_variance8x8, - aom_highbd_12_sub_pixel_avg_variance8x8, aom_highbd_sad8x8x3_bits12, - aom_highbd_sad8x8x8_bits12, aom_highbd_sad8x8x4d_bits12, - aom_highbd_jnt_sad8x8_avg_bits12, + aom_highbd_12_sub_pixel_avg_variance8x8, + aom_highbd_sad8x8x4d_bits12, aom_highbd_jnt_sad8x8_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance8x8); - HIGHBD_BFP(BLOCK_8X4, aom_highbd_sad8x4_bits12, - aom_highbd_sad8x4_avg_bits12, aom_highbd_12_variance8x4, - aom_highbd_12_sub_pixel_variance8x4, - aom_highbd_12_sub_pixel_avg_variance8x4, NULL, - aom_highbd_sad8x4x8_bits12, aom_highbd_sad8x4x4d_bits12, - aom_highbd_jnt_sad8x4_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance8x4); + HIGHBD_BFP( + BLOCK_8X4, aom_highbd_sad8x4_bits12, aom_highbd_sad8x4_avg_bits12, + aom_highbd_12_variance8x4, aom_highbd_12_sub_pixel_variance8x4, + aom_highbd_12_sub_pixel_avg_variance8x4, + aom_highbd_sad8x4x4d_bits12, aom_highbd_jnt_sad8x4_avg_bits12, + aom_highbd_12_jnt_sub_pixel_avg_variance8x4); - HIGHBD_BFP(BLOCK_4X8, aom_highbd_sad4x8_bits12, - aom_highbd_sad4x8_avg_bits12, aom_highbd_12_variance4x8, - aom_highbd_12_sub_pixel_variance4x8, - aom_highbd_12_sub_pixel_avg_variance4x8, NULL, - aom_highbd_sad4x8x8_bits12, aom_highbd_sad4x8x4d_bits12, - aom_highbd_jnt_sad4x8_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance4x8); + HIGHBD_BFP( + BLOCK_4X8, aom_highbd_sad4x8_bits12, aom_highbd_sad4x8_avg_bits12, + aom_highbd_12_variance4x8, aom_highbd_12_sub_pixel_variance4x8, + aom_highbd_12_sub_pixel_avg_variance4x8, + aom_highbd_sad4x8x4d_bits12, aom_highbd_jnt_sad4x8_avg_bits12, + aom_highbd_12_jnt_sub_pixel_avg_variance4x8); HIGHBD_BFP( BLOCK_4X4, aom_highbd_sad4x4_bits12, aom_highbd_sad4x4_avg_bits12, aom_highbd_12_variance4x4, aom_highbd_12_sub_pixel_variance4x4, - aom_highbd_12_sub_pixel_avg_variance4x4, aom_highbd_sad4x4x3_bits12, - aom_highbd_sad4x4x8_bits12, aom_highbd_sad4x4x4d_bits12, - aom_highbd_jnt_sad4x4_avg_bits12, + aom_highbd_12_sub_pixel_avg_variance4x4, + aom_highbd_sad4x4x4d_bits12, aom_highbd_jnt_sad4x4_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance4x4); - HIGHBD_BFP( - BLOCK_128X128, aom_highbd_sad128x128_bits12, - aom_highbd_sad128x128_avg_bits12, aom_highbd_12_variance128x128, - aom_highbd_12_sub_pixel_variance128x128, - aom_highbd_12_sub_pixel_avg_variance128x128, - aom_highbd_sad128x128x3_bits12, aom_highbd_sad128x128x8_bits12, - aom_highbd_sad128x128x4d_bits12, - aom_highbd_jnt_sad128x128_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance128x128); + HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits12, + aom_highbd_sad128x128_avg_bits12, + aom_highbd_12_variance128x128, + aom_highbd_12_sub_pixel_variance128x128, + aom_highbd_12_sub_pixel_avg_variance128x128, + aom_highbd_sad128x128x4d_bits12, + aom_highbd_jnt_sad128x128_avg_bits12, + aom_highbd_12_jnt_sub_pixel_avg_variance128x128); HIGHBD_BFP( BLOCK_128X64, aom_highbd_sad128x64_bits12, aom_highbd_sad128x64_avg_bits12, aom_highbd_12_variance128x64, aom_highbd_12_sub_pixel_variance128x64, - aom_highbd_12_sub_pixel_avg_variance128x64, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance128x64, aom_highbd_sad128x64x4d_bits12, aom_highbd_jnt_sad128x64_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance128x64); @@ -2121,7 +2029,7 @@ BLOCK_64X128, aom_highbd_sad64x128_bits12, aom_highbd_sad64x128_avg_bits12, aom_highbd_12_variance64x128, aom_highbd_12_sub_pixel_variance64x128, - aom_highbd_12_sub_pixel_avg_variance64x128, NULL, NULL, + aom_highbd_12_sub_pixel_avg_variance64x128, aom_highbd_sad64x128x4d_bits12, aom_highbd_jnt_sad64x128_avg_bits12, aom_highbd_12_jnt_sub_pixel_avg_variance64x128); @@ -2575,123 +2483,115 @@ av1_set_speed_features_framesize_independent(cpi); av1_set_speed_features_framesize_dependent(cpi); -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF, JSDAF, JSVAF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx3f = SDX3F; \ - cpi->fn_ptr[BT].sdx8f = SDX8F; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; \ - cpi->fn_ptr[BT].jsdaf = JSDAF; \ +#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].sdaf = SDAF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svaf = SVAF; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].jsdaf = JSDAF; \ cpi->fn_ptr[BT].jsvaf = JSVAF; BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16, - aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, NULL, NULL, + aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, aom_sad4x16x4d, aom_jnt_sad4x16_avg, aom_jnt_sub_pixel_avg_variance4x16) BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4, - aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, NULL, NULL, + aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, aom_sad16x4x4d, aom_jnt_sad16x4_avg, aom_jnt_sub_pixel_avg_variance16x4) BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32, - aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, NULL, NULL, + aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, aom_sad8x32x4d, aom_jnt_sad8x32_avg, aom_jnt_sub_pixel_avg_variance8x32) BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8, - aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, NULL, NULL, + aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, aom_sad32x8x4d, aom_jnt_sad32x8_avg, aom_jnt_sub_pixel_avg_variance32x8) BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64, - aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64, NULL, NULL, + aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64, aom_sad16x64x4d, aom_jnt_sad16x64_avg, aom_jnt_sub_pixel_avg_variance16x64) BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16, - aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, NULL, NULL, + aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, aom_sad64x16x4d, aom_jnt_sad64x16_avg, aom_jnt_sub_pixel_avg_variance64x16) BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128, aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128, - aom_sad128x128x3, aom_sad128x128x8, aom_sad128x128x4d, - aom_jnt_sad128x128_avg, aom_jnt_sub_pixel_avg_variance128x128) + aom_sad128x128x4d, aom_jnt_sad128x128_avg, + aom_jnt_sub_pixel_avg_variance128x128) BFP(BLOCK_128X64, aom_sad128x64, aom_sad128x64_avg, aom_variance128x64, - aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64, NULL, - NULL, aom_sad128x64x4d, aom_jnt_sad128x64_avg, + aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64, + aom_sad128x64x4d, aom_jnt_sad128x64_avg, aom_jnt_sub_pixel_avg_variance128x64) BFP(BLOCK_64X128, aom_sad64x128, aom_sad64x128_avg, aom_variance64x128, - aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128, NULL, - NULL, aom_sad64x128x4d, aom_jnt_sad64x128_avg, + aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128, + aom_sad64x128x4d, aom_jnt_sad64x128_avg, aom_jnt_sub_pixel_avg_variance64x128) BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16, - aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, NULL, NULL, + aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, aom_sad32x16x4d, aom_jnt_sad32x16_avg, aom_jnt_sub_pixel_avg_variance32x16) BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg, aom_variance16x32, - aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, NULL, NULL, + aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, aom_sad16x32x4d, aom_jnt_sad16x32_avg, aom_jnt_sub_pixel_avg_variance16x32) BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg, aom_variance64x32, - aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, NULL, NULL, + aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, aom_sad64x32x4d, aom_jnt_sad64x32_avg, aom_jnt_sub_pixel_avg_variance64x32) BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg, aom_variance32x64, - aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, NULL, NULL, + aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, aom_sad32x64x4d, aom_jnt_sad32x64_avg, aom_jnt_sub_pixel_avg_variance32x64) BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg, aom_variance32x32, aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32, - aom_sad32x32x3, aom_sad32x32x8, aom_sad32x32x4d, aom_jnt_sad32x32_avg, + aom_sad32x32x4d, aom_jnt_sad32x32_avg, aom_jnt_sub_pixel_avg_variance32x32) BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg, aom_variance64x64, aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64, - aom_sad64x64x3, aom_sad64x64x8, aom_sad64x64x4d, aom_jnt_sad64x64_avg, + aom_sad64x64x4d, aom_jnt_sad64x64_avg, aom_jnt_sub_pixel_avg_variance64x64) BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg, aom_variance16x16, aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16, - aom_sad16x16x3, aom_sad16x16x8, aom_sad16x16x4d, aom_jnt_sad16x16_avg, + aom_sad16x16x4d, aom_jnt_sad16x16_avg, aom_jnt_sub_pixel_avg_variance16x16) BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg, aom_variance16x8, - aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, aom_sad16x8x3, - aom_sad16x8x8, aom_sad16x8x4d, aom_jnt_sad16x8_avg, - aom_jnt_sub_pixel_avg_variance16x8) + aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, + aom_sad16x8x4d, aom_jnt_sad16x8_avg, aom_jnt_sub_pixel_avg_variance16x8) BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg, aom_variance8x16, - aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, aom_sad8x16x3, - aom_sad8x16x8, aom_sad8x16x4d, aom_jnt_sad8x16_avg, - aom_jnt_sub_pixel_avg_variance8x16) + aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, + aom_sad8x16x4d, aom_jnt_sad8x16_avg, aom_jnt_sub_pixel_avg_variance8x16) BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg, aom_variance8x8, - aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x3, - aom_sad8x8x8, aom_sad8x8x4d, aom_jnt_sad8x8_avg, - aom_jnt_sub_pixel_avg_variance8x8) + aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x4d, + aom_jnt_sad8x8_avg, aom_jnt_sub_pixel_avg_variance8x8) BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg, aom_variance8x4, - aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, NULL, - aom_sad8x4x8, aom_sad8x4x4d, aom_jnt_sad8x4_avg, - aom_jnt_sub_pixel_avg_variance8x4) + aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, aom_sad8x4x4d, + aom_jnt_sad8x4_avg, aom_jnt_sub_pixel_avg_variance8x4) BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg, aom_variance4x8, - aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, NULL, - aom_sad4x8x8, aom_sad4x8x4d, aom_jnt_sad4x8_avg, - aom_jnt_sub_pixel_avg_variance4x8) + aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, aom_sad4x8x4d, + aom_jnt_sad4x8_avg, aom_jnt_sub_pixel_avg_variance4x8) BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4, - aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x3, - aom_sad4x4x8, aom_sad4x4x4d, aom_jnt_sad4x4_avg, - aom_jnt_sub_pixel_avg_variance4x4) + aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x4d, + aom_jnt_sad4x4_avg, aom_jnt_sub_pixel_avg_variance4x4) #define OBFP(BT, OSDF, OVF, OSVF) \ cpi->fn_ptr[BT].osdf = OSDF; \
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index cc26168..3dc6491 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -481,7 +481,6 @@ ActiveMap active_map; fractional_mv_step_fp *find_fractional_mv_step; - av1_full_search_fn_t full_search_sad; // It is currently unused. av1_diamond_search_fn_t diamond_search_sad; aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL]; uint64_t time_receive_data;
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c index 566e96c..06bd793 100644 --- a/av1/encoder/mcomp.c +++ b/av1/encoder/mcomp.c
@@ -2125,197 +2125,6 @@ return bestsme; } -int av1_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, - int sad_per_bit, int distance, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, MV *best_mv) { - int r, c; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min); - const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max); - const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min); - const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max); - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - int best_sad = - fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); - *best_mv = *ref_mv; - - for (r = row_min; r < row_max; ++r) { - for (c = col_min; c < col_max; ++c) { - const MV mv = { r, c }; - const int sad = - fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride) + - mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - } - return best_sad; -} - -int av1_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, - int sad_per_bit, int distance, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, MV *best_mv) { - int r; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min); - const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max); - const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min); - const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max); - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - unsigned int best_sad = - fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); - *best_mv = *ref_mv; - - for (r = row_min; r < row_max; ++r) { - int c = col_min; - const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; - - if (fn_ptr->sdx3f != NULL) { - while ((c + 2) < col_max) { - int i; - DECLARE_ALIGNED(16, uint32_t, sads[3]); - - fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, - sads); - - for (i = 0; i < 3; ++i) { - unsigned int sad = sads[i]; - if (sad < best_sad) { - const MV mv = { r, c }; - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - ++check_here; - ++c; - } - } - } - - while (c < col_max) { - unsigned int sad = - fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride); - if (sad < best_sad) { - const MV mv = { r, c }; - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - ++check_here; - ++c; - } - } - - return best_sad; -} - -int av1_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, - int sad_per_bit, int distance, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, MV *best_mv) { - int r; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min); - const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max); - const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min); - const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max); - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - unsigned int best_sad = - fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); - *best_mv = *ref_mv; - - for (r = row_min; r < row_max; ++r) { - int c = col_min; - const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; - - if (fn_ptr->sdx8f != NULL) { - while ((c + 7) < col_max) { - int i; - DECLARE_ALIGNED(16, uint32_t, sads[8]); - - fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, - sads); - - for (i = 0; i < 8; ++i) { - unsigned int sad = sads[i]; - if (sad < best_sad) { - const MV mv = { r, c }; - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - ++check_here; - ++c; - } - } - } - - if (fn_ptr->sdx3f != NULL) { - while ((c + 2) < col_max) { - int i; - DECLARE_ALIGNED(16, uint32_t, sads[3]); - - fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, - sads); - - for (i = 0; i < 3; ++i) { - unsigned int sad = sads[i]; - if (sad < best_sad) { - const MV mv = { r, c }; - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - ++check_here; - ++c; - } - } - } - - while (c < col_max) { - unsigned int sad = - fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride); - if (sad < best_sad) { - const MV mv = { r, c }; - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - ++check_here; - ++c; - } - } - - return best_sad; -} - int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const aom_variance_fn_ptr_t *fn_ptr,
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index c573ecb..4d8b807 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -495,7 +495,6 @@ sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6); } - cpi->full_search_sad = av1_full_search_sad; cpi->diamond_search_sad = av1_diamond_search_sad; sf->allow_exhaustive_searches = 1;