namespace ARCH_* defines This prevents redefinition warnings if a toolchain defines one. This matches the following change in libvpx: fad865c54 namespace ARCH_* defines Change-Id: I7cddf2a3a0cd39de9121049e48c949568ccf1a21
diff --git a/aom/src/aom_encoder.c b/aom/src/aom_encoder.c index f9fe2fe..a4acbcc 100644 --- a/aom/src/aom_encoder.c +++ b/aom/src/aom_encoder.c
@@ -121,7 +121,7 @@ return res; } -#if ARCH_X86 || ARCH_X86_64 +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 /* On X86, disable the x87 unit's internal 80 bit precision for better * consistency with the SSE unit's 64 bit precision. */ @@ -132,7 +132,7 @@ #else #define FLOATING_POINT_SET_PRECISION #define FLOATING_POINT_RESTORE_PRECISION -#endif // ARCH_X86 || ARCH_X86_64 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 #if HAVE_FEXCEPT && CONFIG_DEBUG #define FLOATING_POINT_SET_EXCEPTIONS \
diff --git a/aom_dsp/x86/aom_subpixel_8t_ssse3.asm b/aom_dsp/x86/aom_subpixel_8t_ssse3.asm index 3ca7921..e5fafb0 100644 --- a/aom_dsp/x86/aom_subpixel_8t_ssse3.asm +++ b/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
@@ -30,7 +30,7 @@ %define LOCAL_VARS_SIZE 16*6 %macro SETUP_LOCAL_VARS 0 - ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 + + ; TODO(slavarnway): using xmm registers for these on AOM_ARCH_X86_64 + ; pmaddubsw has a higher latency on some platforms, this might be eased by ; interleaving the instructions. %define k0k1 [rsp + 16*0] @@ -52,7 +52,7 @@ mova k2k3, m1 mova k4k5, m2 mova k6k7, m3 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define krd m12 %define tmp0 [rsp + 16*4] %define tmp1 [rsp + 16*5] @@ -72,7 +72,7 @@ %endm ;------------------------------------------------------------------------------- -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define LOCAL_VARS_SIZE_H4 0 %else %define LOCAL_VARS_SIZE_H4 16*4 @@ -83,7 +83,7 @@ src, sstride, dst, dstride, height, filter mova m4, [filterq] packsswb m4, m4 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define k0k1k4k5 m8 %define k2k3k6k7 m9 %define krd m10 @@ -346,7 +346,7 @@ psraw m0, 7 psraw m4, 7 %ifidn %1, h8_add_src -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 pcmpeqb m2, m2 ;all ones psrlw m2, 8 ;even_byte_mask %else @@ -383,7 +383,7 @@ ; TODO(Linfeng): Detect cpu type and choose the code with better performance. %define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1 -%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON +%if AOM_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON %define NUM_GENERAL_REG_USED 9 %else %define NUM_GENERAL_REG_USED 6 @@ -403,9 +403,9 @@ dec heightd -%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON +%if AOM_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define src1q r7 %define sstride6q r8 %define dst_stride dstrideq @@ -528,7 +528,7 @@ movx [dstq], m0 %else - ; ARCH_X86_64 + ; AOM_ARCH_X86_64 movx m0, [srcq ] ;A movx m1, [srcq + sstrideq ] ;B @@ -628,7 +628,7 @@ %endif movx [dstq], m0 -%endif ; ARCH_X86_64 +%endif ; AOM_ARCH_X86_64 .done: REP_RET @@ -642,9 +642,9 @@ mova m4, [filterq] SETUP_LOCAL_VARS -%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON +%if AOM_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define src1q r7 %define sstride6q r8 %define dst_stride dstrideq @@ -724,7 +724,7 @@ REP_RET %else - ; ARCH_X86_64 + ; AOM_ARCH_X86_64 dec heightd movu m1, [srcq ] ;A @@ -860,7 +860,7 @@ .done: REP_RET -%endif ; ARCH_X86_64 +%endif ; AOM_ARCH_X86_64 %endm
diff --git a/aom_dsp/x86/blk_sse_sum_avx2.c b/aom_dsp/x86/blk_sse_sum_avx2.c index f7c0eb0..fdf7de3 100644 --- a/aom_dsp/x86/blk_sse_sum_avx2.c +++ b/aom_dsp/x86/blk_sse_sum_avx2.c
@@ -31,7 +31,7 @@ out_buffer = _mm256_castsi256_si128(regx_sum); *x_sum += _mm_cvtsi128_si32(out_buffer); out_buffer = _mm256_castsi256_si128(regx2_sum); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 *x2_sum += _mm_cvtsi128_si64(out_buffer); #else {
diff --git a/aom_dsp/x86/blk_sse_sum_sse2.c b/aom_dsp/x86/blk_sse_sum_sse2.c index ef0a024..bf89427 100644 --- a/aom_dsp/x86/blk_sse_sum_sse2.c +++ b/aom_dsp/x86/blk_sse_sum_sse2.c
@@ -41,7 +41,7 @@ temp_buffer2 = _mm_unpackhi_epi32(regx2_sum, _mm_setzero_si128()); regx2_sum = _mm_add_epi64(temp_buffer1, temp_buffer2); regx2_sum = _mm_add_epi64(regx2_sum, _mm_srli_si128(regx2_sum, 8)); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 *x2_sum += _mm_cvtsi128_si64(regx2_sum); #else { @@ -82,7 +82,7 @@ temp_buffer2 = _mm_unpackhi_epi32(regx2_sum, _mm_setzero_si128()); regx2_sum = _mm_add_epi64(temp_buffer1, temp_buffer2); regx2_sum = _mm_add_epi64(regx2_sum, _mm_srli_si128(regx2_sum, 8)); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 *x2_sum += _mm_cvtsi128_si64(regx2_sum); #else {
diff --git a/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm b/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm index c1fb259..0687904 100644 --- a/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm +++ b/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
@@ -45,7 +45,7 @@ SECTION .text -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 INIT_XMM ssse3 cglobal fdct8x8, 3, 5, 13, input, output, stride
diff --git a/aom_dsp/x86/highbd_sad4d_sse2.asm b/aom_dsp/x86/highbd_sad4d_sse2.asm index 9442cd0..03839b4 100644 --- a/aom_dsp/x86/highbd_sad4d_sse2.asm +++ b/aom_dsp/x86/highbd_sad4d_sse2.asm
@@ -221,21 +221,21 @@ ; 3: If 0, then normal sad, if 2, then skip every other row %macro HIGH_SADNXN4D 2-3 0 %if %3 == 0 ; normal sad -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ res, ref2, ref3, ref4 %else cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ ref2, ref3, ref4 -%endif ; ARCH_X86_64 +%endif ; AOM_ARCH_X86_64 %else ; %3 == 2, downsample -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 cglobal highbd_sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ res, ref2, ref3, ref4 %else cglobal highbd_sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ ref2, ref3, ref4 -%endif ; ARCH_X86_64 +%endif ; AOM_ARCH_X86_64 %endif ; sad/avg/skip ; set m1
diff --git a/aom_dsp/x86/highbd_sad_sse2.asm b/aom_dsp/x86/highbd_sad_sse2.asm index 48b93bf..3dc4e4e 100644 --- a/aom_dsp/x86/highbd_sad_sse2.asm +++ b/aom_dsp/x86/highbd_sad_sse2.asm
@@ -34,11 +34,11 @@ cglobal highbd_sad%1x%2_avg, 5, 1 + %3, %5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, %5, src, src_stride, \ +cglobal highbd_sad%1x%2_avg, 5, AOM_ARCH_X86_64 + %3, %5, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define n_rowsd r7d %else ; x86-32 %define n_rowsd dword r0m
diff --git a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm index 5c78933..c0ccc18 100644 --- a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm +++ b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -81,7 +81,7 @@ %endmacro %macro INC_SRC_BY_SRC_STRIDE 0 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 add srcq, src_stridemp add srcq, src_stridemp %else @@ -94,7 +94,7 @@ %define filter_idx_shift 5 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %if %2 == 1 ; avg cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ x_offset, y_offset, \ @@ -271,11 +271,11 @@ .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if AOM_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+y_offsetq] mova m9, [bilin_filter+y_offsetq+16] mova m10, [GLOBAL(pw_8)] @@ -283,7 +283,7 @@ %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0, reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -498,11 +498,11 @@ .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if AOM_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+y_offsetq] mova m9, [bilin_filter+y_offsetq+16] mova m10, [GLOBAL(pw_8)] @@ -510,7 +510,7 @@ %define filter_y_b m9 %define filter_rnd m10 %else ; x86_32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0.5. We can reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -620,11 +620,11 @@ jnz .x_nonhalf_y_nonzero ; x_offset == bilin interpolation && y_offset == 0 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if AOM_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [GLOBAL(pw_8)] @@ -632,7 +632,7 @@ %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -719,11 +719,11 @@ jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if AOM_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [GLOBAL(pw_8)] @@ -731,7 +731,7 @@ %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0.5. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -846,12 +846,12 @@ .x_nonhalf_y_nonhalf: ; loading filter - this is same as in 8-bit depth -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5 shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && mmsize == 16 +%if AOM_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [bilin_filter+y_offsetq] @@ -863,7 +863,7 @@ %define filter_y_b m11 %define filter_rnd m12 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; In this case, there is NO unused register. Used src_stride register. Later, ; src_stride has to be loaded from stack when it is needed. %define tempq src_strideq
diff --git a/aom_dsp/x86/obmc_intrinsic_ssse3.h b/aom_dsp/x86/obmc_intrinsic_ssse3.h index 48486c6..27398ff 100644 --- a/aom_dsp/x86/obmc_intrinsic_ssse3.h +++ b/aom_dsp/x86/obmc_intrinsic_ssse3.h
@@ -24,7 +24,7 @@ static INLINE int64_t xx_hsum_epi64_si64(__m128i v_q) { v_q = _mm_add_epi64(v_q, _mm_srli_si128(v_q, 8)); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 return _mm_cvtsi128_si64(v_q); #else {
diff --git a/aom_dsp/x86/sad4d_sse2.asm b/aom_dsp/x86/sad4d_sse2.asm index 6696c40..6edad99 100644 --- a/aom_dsp/x86/sad4d_sse2.asm +++ b/aom_dsp/x86/sad4d_sse2.asm
@@ -200,7 +200,7 @@ %define use_loop (use_ref_offset || %2 > 4) %if %3 == 1 ; skip rows -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %if use_ref_offset cglobal sad_skip_%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, \ ref2, ref3, ref4, cnt, ref_offset @@ -227,7 +227,7 @@ %endif %endif %else ; normal sad -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %if use_ref_offset cglobal sad%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, ref2, \ ref3, ref4, cnt, ref_offset
diff --git a/aom_dsp/x86/sad_sse2.asm b/aom_dsp/x86/sad_sse2.asm index de9845a..dbe8ca3 100644 --- a/aom_dsp/x86/sad_sse2.asm +++ b/aom_dsp/x86/sad_sse2.asm
@@ -42,11 +42,11 @@ cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 -cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \ +cglobal sad%1x%2_avg, 5, AOM_ARCH_X86_64 + %3, 6, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define n_rowsd r7d %else ; x86-32 %define n_rowsd dword r0m
diff --git a/aom_dsp/x86/subpel_variance_sse2.asm b/aom_dsp/x86/subpel_variance_sse2.asm index cbf2890..d1d8373 100644 --- a/aom_dsp/x86/subpel_variance_sse2.asm +++ b/aom_dsp/x86/subpel_variance_sse2.asm
@@ -98,7 +98,7 @@ %endmacro %macro INC_SRC_BY_SRC_STRIDE 0 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 add srcq, src_stridemp %else add srcq, src_strideq @@ -117,7 +117,7 @@ ; 11, not 13, if the registers are ordered correctly. May make a minor speed ; difference on Win64 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %if %2 == 1 ; avg cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ x_offset, y_offset, dst, dst_stride, \ @@ -355,11 +355,11 @@ .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if AOM_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] @@ -369,7 +369,7 @@ %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0, reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -678,11 +678,11 @@ .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if AOM_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] @@ -692,7 +692,7 @@ %define filter_y_b m9 %define filter_rnd m10 %else ;x86_32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0.5. We can reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm @@ -836,11 +836,11 @@ jnz .x_nonhalf_y_nonzero ; x_offset == bilin interpolation && y_offset == 0 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if AOM_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -850,7 +850,7 @@ %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ;y_offset == 0. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -978,11 +978,11 @@ jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if AOM_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -992,7 +992,7 @@ %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0.5. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm @@ -1176,12 +1176,12 @@ STORE_AND_RET %1 .x_nonhalf_y_nonhalf: -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift shl y_offsetd, filter_idx_shift -%if ARCH_X86_64 && %1 > 4 +%if AOM_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] @@ -1197,7 +1197,7 @@ %define filter_y_b m11 %define filter_rnd m12 %else ; x86-32 -%if ARCH_X86=1 && CONFIG_PIC=1 +%if AOM_ARCH_X86=1 && CONFIG_PIC=1 ; In this case, there is NO unused register. Used src_stride register. Later, ; src_stride has to be loaded from stack when it is needed. %define tempq src_strideq
diff --git a/aom_dsp/x86/sum_squares_sse2.c b/aom_dsp/x86/sum_squares_sse2.c index 25be856..cf3ed98 100644 --- a/aom_dsp/x86/sum_squares_sse2.c +++ b/aom_dsp/x86/sum_squares_sse2.c
@@ -23,7 +23,7 @@ } static INLINE uint64_t xx_cvtsi128_si64(__m128i a) { -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 return (uint64_t)_mm_cvtsi128_si64(a); #else {
diff --git a/aom_ports/x86.h b/aom_ports/x86.h index d44d386..c089984 100644 --- a/aom_ports/x86.h +++ b/aom_ports/x86.h
@@ -44,7 +44,7 @@ } aom_cpu_t; #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 #define cpuid(func, func2, ax, bx, cx, dx) \ __asm__ __volatile__("cpuid \n\t" \ : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \ @@ -60,7 +60,7 @@ #endif #elif defined(__SUNPRO_C) || \ defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 #define cpuid(func, func2, ax, bx, cx, dx) \ asm volatile( \ "xchg %rsi, %rbx \n\t" \ @@ -80,7 +80,7 @@ : "a"(func), "c"(func2)) #endif #else /* end __SUNPRO__ */ -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 #if defined(_MSC_VER) && _MSC_VER > 1500 #define cpuid(func, func2, a, b, c, d) \ do { \ @@ -258,7 +258,7 @@ asm volatile("rdtsc\n\t" : "=a"(tsc) :); return tsc; #else -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 return (unsigned int)__rdtsc(); #else __asm rdtsc; @@ -276,7 +276,7 @@ asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); return ((uint64_t)hi << 32) | lo; #else -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 return (uint64_t)__rdtsc(); #else __asm rdtsc; @@ -298,7 +298,7 @@ unsigned int ui; return (unsigned int)__rdtscp(&ui); #else -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 return (unsigned int)__rdtscp(); #else __asm rdtscp; @@ -336,7 +336,7 @@ #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) #define x86_pause_hint() asm volatile("pause \n\t") #else -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 #define x86_pause_hint() _mm_pause(); #else #define x86_pause_hint() __asm pause @@ -361,7 +361,7 @@ asm volatile("fstcw %0\n\t" : "=m"(*&mode) :); return mode; } -#elif ARCH_X86_64 +#elif AOM_ARCH_X86_64 /* No fldcw intrinsics on Windows x64, punt to external asm */ extern void aom_winx64_fldcw(unsigned short mode); extern unsigned short aom_winx64_fstcw(void);
diff --git a/av1/encoder/x86/error_intrin_sse2.c b/av1/encoder/x86/error_intrin_sse2.c index e876db1..61f65c6 100644 --- a/av1/encoder/x86/error_intrin_sse2.c +++ b/av1/encoder/x86/error_intrin_sse2.c
@@ -65,11 +65,11 @@ accum = reduce_sum_epi64(accum); // Store the results. -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 return _mm_cvtsi128_si64(accum); #else int64_t result; _mm_storel_epi64((__m128i *)&result, accum); return result; -#endif // ARCH_X86_64 +#endif // AOM_ARCH_X86_64 }
diff --git a/av1/encoder/x86/error_sse2.asm b/av1/encoder/x86/error_sse2.asm index f4b4968..6407c10 100644 --- a/av1/encoder/x86/error_sse2.asm +++ b/av1/encoder/x86/error_sse2.asm
@@ -75,7 +75,7 @@ movhlps m7, m6 paddq m4, m5 paddq m6, m7 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 movq rax, m4 movq [sszq], m6 %else
diff --git a/av1/encoder/x86/wedge_utils_avx2.c b/av1/encoder/x86/wedge_utils_avx2.c index bbc62d5..9cde860 100644 --- a/av1/encoder/x86/wedge_utils_avx2.c +++ b/av1/encoder/x86/wedge_utils_avx2.c
@@ -72,7 +72,7 @@ __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc0_q); __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc0_q, 1); v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 csse = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0); #else xx_storel_64(&csse, v_acc_q_0); @@ -141,7 +141,7 @@ __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc_q, 1); v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 acc = _mm_extract_epi64(v_acc_q_0, 0); #else xx_storel_64(&acc, v_acc_q_0);
diff --git a/av1/encoder/x86/wedge_utils_sse2.c b/av1/encoder/x86/wedge_utils_sse2.c index e665b2e..d7ac222 100644 --- a/av1/encoder/x86/wedge_utils_sse2.c +++ b/av1/encoder/x86/wedge_utils_sse2.c
@@ -85,7 +85,7 @@ v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8)); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q); #else xx_storel_64(&csse, v_acc0_q); @@ -174,7 +174,7 @@ v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); -#if ARCH_X86_64 +#if AOM_ARCH_X86_64 acc = _mm_cvtsi128_si64(v_acc_q); #else xx_storel_64(&acc, v_acc_q);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index 726415c..d688e62 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -23,10 +23,10 @@ set_aom_detect_var(INLINE "" "Sets INLINE value for current target.") # CPUs. -set_aom_detect_var(ARCH_ARM 0 "Enables ARM architecture.") -set_aom_detect_var(ARCH_PPC 0 "Enables PPC architecture.") -set_aom_detect_var(ARCH_X86 0 "Enables X86 architecture.") -set_aom_detect_var(ARCH_X86_64 0 "Enables X86_64 architecture.") +set_aom_detect_var(AOM_ARCH_ARM 0 "Enables ARM architecture.") +set_aom_detect_var(AOM_ARCH_PPC 0 "Enables PPC architecture.") +set_aom_detect_var(AOM_ARCH_X86 0 "Enables X86 architecture.") +set_aom_detect_var(AOM_ARCH_X86_64 0 "Enables X86_64 architecture.") # ARM feature flags. set_aom_detect_var(HAVE_NEON 0 "Enables NEON intrinsics optimizations.")
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake index 99ac38a..730ad42 100644 --- a/build/cmake/cpu.cmake +++ b/build/cmake/cpu.cmake
@@ -10,7 +10,7 @@ # if("${AOM_TARGET_CPU}" MATCHES "^arm") - set(ARCH_ARM 1) + set(AOM_ARCH_ARM 1) set(RTCD_ARCH_ARM "yes") if(ENABLE_NEON) @@ -34,7 +34,7 @@ endif() elseif("${AOM_TARGET_CPU}" MATCHES "ppc") - set(ARCH_PPC 1) + set(AOM_ARCH_PPC 1) set(RTCD_ARCH_PPC "yes") if(ENABLE_VSX) @@ -46,10 +46,10 @@ endif() elseif("${AOM_TARGET_CPU}" MATCHES "^x86") if("${AOM_TARGET_CPU}" STREQUAL "x86") - set(ARCH_X86 1) + set(AOM_ARCH_X86 1) set(RTCD_ARCH_X86 "yes") elseif("${AOM_TARGET_CPU}" STREQUAL "x86_64") - set(ARCH_X86_64 1) + set(AOM_ARCH_X86_64 1) set(RTCD_ARCH_X86_64 "yes") endif()
diff --git a/test/convolve_test.cc b/test/convolve_test.cc index d5232ee..2ce3a20 100644 --- a/test/convolve_test.cc +++ b/test/convolve_test.cc
@@ -727,14 +727,14 @@ aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \ filter_x_stride, filter_y, filter_y_stride, w, h, bd); \ } -#if HAVE_SSE2 && ARCH_X86_64 +#if HAVE_SSE2 && AOM_ARCH_X86_64 WRAP(convolve8_horiz_sse2, 8) WRAP(convolve8_vert_sse2, 8) WRAP(convolve8_horiz_sse2, 10) WRAP(convolve8_vert_sse2, 10) WRAP(convolve8_horiz_sse2, 12) WRAP(convolve8_vert_sse2, 12) -#endif // HAVE_SSE2 && ARCH_X86_64 +#endif // HAVE_SSE2 && AOM_ARCH_X86_64 WRAP(convolve8_horiz_c, 8) WRAP(convolve8_vert_c, 8) @@ -776,7 +776,7 @@ INSTANTIATE_TEST_SUITE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c)); -#if HAVE_SSE2 && ARCH_X86_64 +#if HAVE_SSE2 && AOM_ARCH_X86_64 #if CONFIG_AV1_HIGHBITDEPTH const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8, wrap_convolve8_vert_sse2_8, 8);
diff --git a/test/fft_test.cc b/test/fft_test.cc index 04d047d..5443c99 100644 --- a/test/fft_test.cc +++ b/test/fft_test.cc
@@ -147,7 +147,7 @@ FFTTestArg(16, aom_fft16x16_float_c), FFTTestArg(32, aom_fft32x32_float_c))); -#if ARCH_X86 || ARCH_X86_64 +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 #if HAVE_SSE2 INSTANTIATE_TEST_SUITE_P( SSE2, FFT2DTest, @@ -163,7 +163,7 @@ FFTTestArg(16, aom_fft16x16_float_avx2), FFTTestArg(32, aom_fft32x32_float_avx2))); #endif // HAVE_AVX2 -#endif // ARCH_X86 || ARCH_X86_64 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 struct IFFTTestArg { int n; @@ -246,7 +246,7 @@ IFFTTestArg(8, aom_ifft8x8_float_c), IFFTTestArg(16, aom_ifft16x16_float_c), IFFTTestArg(32, aom_ifft32x32_float_c))); -#if ARCH_X86 || ARCH_X86_64 +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 #if HAVE_SSE2 INSTANTIATE_TEST_SUITE_P( SSE2, IFFT2DTest, @@ -263,6 +263,6 @@ IFFTTestArg(16, aom_ifft16x16_float_avx2), IFFTTestArg(32, aom_ifft32x32_float_avx2))); #endif // HAVE_AVX2 -#endif // ARCH_X86 || ARCH_X86_64 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 } // namespace
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc index 6f58898..04e8306 100644 --- a/test/quantize_func_test.cc +++ b/test/quantize_func_test.cc
@@ -768,7 +768,7 @@ ::testing::ValuesIn(kQParamArrayNEON)); #endif -#if HAVE_SSSE3 && ARCH_X86_64 +#if HAVE_SSSE3 && AOM_ARCH_X86_64 INSTANTIATE_TEST_SUITE_P( SSSE3, FullPrecisionQuantizeTest, ::testing::Values( @@ -779,7 +779,7 @@ make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3, static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8))); -#endif // HAVE_SSSE3 && ARCH_X86_64 +#endif // HAVE_SSSE3 && AOM_ARCH_X86_64 #if HAVE_AVX INSTANTIATE_TEST_SUITE_P(
diff --git a/test/register_state_check.h b/test/register_state_check.h index 0a150c3..4aad814 100644 --- a/test/register_state_check.h +++ b/test/register_state_check.h
@@ -26,7 +26,7 @@ // See platform implementations of RegisterStateCheck and // RegisterStateCheckMMX for details. -#if defined(_WIN64) && ARCH_X86_64 +#if defined(_WIN64) && AOM_ARCH_X86_64 #undef NOMINMAX #define NOMINMAX @@ -86,9 +86,9 @@ class RegisterStateCheck {}; } // namespace libaom_test -#endif // _WIN64 && ARCH_X86_64 +#endif // _WIN64 && AOM_ARCH_X86_64 -#if (ARCH_X86 || ARCH_X86_64) && defined(__GNUC__) +#if (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__) namespace libaom_test { // Checks the FPU tag word pre/post execution to ensure emms has been called. @@ -122,7 +122,7 @@ class RegisterStateCheckMMX {}; } // namespace libaom_test -#endif // (ARCH_X86 || ARCH_X86_64) && defined(__GNUC__) +#endif // (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__) #define API_REGISTER_STATE_CHECK(statement) \ do { \
diff --git a/test/test_libaom.cc b/test/test_libaom.cc index 462d877..6ffbbc5 100644 --- a/test/test_libaom.cc +++ b/test/test_libaom.cc
@@ -17,7 +17,7 @@ #include "config/aom_config.h" -#if ARCH_X86 || ARCH_X86_64 +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 #include "aom_ports/x86.h" #endif extern "C" { @@ -26,7 +26,7 @@ extern void aom_scale_rtcd(); } -#if ARCH_X86 || ARCH_X86_64 +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 static void append_negative_gtest_filter(const char *str) { std::string flag_value = GTEST_FLAG_GET(filter); // Negative patterns begin with one '-' followed by a ':' separated list. @@ -44,12 +44,12 @@ } GTEST_FLAG_SET(filter, flag_value); } -#endif // ARCH_X86 || ARCH_X86_64 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); -#if ARCH_X86 || ARCH_X86_64 +#if AOM_ARCH_X86 || AOM_ARCH_X86_64 const int simd_caps = x86_simd_caps(); if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX"); if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE"); @@ -60,7 +60,7 @@ if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2"); if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX"); if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2"); -#endif // ARCH_X86 || ARCH_X86_64 +#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64 // Shared library builds don't support whitebox tests that exercise internal // symbols.
diff --git a/third_party/x86inc/README.libaom b/third_party/x86inc/README.libaom index 2f3e5c2..6b92358 100644 --- a/third_party/x86inc/README.libaom +++ b/third_party/x86inc/README.libaom
@@ -16,3 +16,4 @@ Use .text instead of .rodata on macho to avoid broken tables in PIC mode. Use .text with no alignment for aout. Only use 'hidden' visibility with Chromium. +Prefix ARCH_* with AOM_.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm index e48d644..b0421f5 100644 --- a/third_party/x86inc/x86inc.asm +++ b/third_party/x86inc/x86inc.asm
@@ -45,7 +45,7 @@ %endif %ifndef STACK_ALIGNMENT - %if ARCH_X86_64 + %if AOM_ARCH_X86_64 %define STACK_ALIGNMENT 16 %else %define STACK_ALIGNMENT 4 @@ -54,7 +54,7 @@ %define WIN64 0 %define UNIX64 0 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %ifidn __OUTPUT_FORMAT__,win32 %define WIN64 1 %elifidn __OUTPUT_FORMAT__,win64 @@ -168,7 +168,7 @@ %endif %endif - %if ARCH_X86_64 == 0 + %if AOM_ARCH_X86_64 == 0 %undef PIC %endif @@ -277,7 +277,7 @@ %if %0 == 2 %define r%1m %2d %define r%1mp %2 - %elif ARCH_X86_64 ; memory + %elif AOM_ARCH_X86_64 ; memory %define r%1m [rstk + stack_offset + %3] %define r%1mp qword r %+ %1 %+ m %else @@ -298,7 +298,7 @@ %define e%1h %3 %define r%1b %2 %define e%1b %2 - %if ARCH_X86_64 == 0 + %if AOM_ARCH_X86_64 == 0 %define r%1 e%1 %endif %endmacro @@ -335,14 +335,14 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 %define gprsize 8 %else %define gprsize 4 %endif %macro LEA 2 -%if ARCH_X86_64 +%if AOM_ARCH_X86_64 lea %1, [%2] %elif PIC call $+5 ; special-cased to not affect the RSB on most CPU:s @@ -414,7 +414,7 @@ %endif %endmacro -%if ARCH_X86_64 == 0 +%if AOM_ARCH_X86_64 == 0 %define movsxd movifnidn %endif @@ -466,7 +466,7 @@ %endmacro %define required_stack_alignment ((mmsize + 15) & ~15) -%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512))) +%define vzeroupper_required (mmsize > 16 && (AOM_ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512))) %define high_mm_regs (16*cpuflag(avx512)) %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) @@ -521,13 +521,13 @@ ; Reserve an additional register for storing the original stack pointer, but avoid using ; eax/rax for this purpose since it can potentially get overwritten as a return value. %assign regs_used (regs_used + 1) - %if ARCH_X86_64 && regs_used == 7 + %if AOM_ARCH_X86_64 && regs_used == 7 %assign regs_used 8 - %elif ARCH_X86_64 == 0 && regs_used == 1 + %elif AOM_ARCH_X86_64 == 0 && regs_used == 1 %assign regs_used 2 %endif %endif - %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 + %if AOM_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 ; Ensure that we don't clobber any registers containing arguments. For UNIX64 we also preserve r6 (rax) ; since it's used as a hidden argument in vararg functions to specify the number of vector registers used. %assign regs_used 5 + UNIX64 * 3 @@ -654,7 +654,7 @@ AUTO_REP_RET %endmacro -%elif ARCH_X86_64 ; *nix x64 ;============================================= +%elif AOM_ARCH_X86_64 ; *nix x64 ;============================================= DECLARE_REG 0, rdi DECLARE_REG 1, rsi @@ -1002,7 +1002,7 @@ %endif %endif - %if ARCH_X86_64 || cpuflag(sse2) + %if AOM_ARCH_X86_64 || cpuflag(sse2) %ifdef __NASM_VER__ ALIGNMODE p6 %else @@ -1039,7 +1039,7 @@ %endif %assign num_mmregs 8 - %if ARCH_X86_64 && mmsize >= 16 + %if AOM_ARCH_X86_64 && mmsize >= 16 %assign num_mmregs 16 %if cpuflag(avx512) || mmsize == 64 %assign num_mmregs 32 @@ -1064,7 +1064,7 @@ ; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper %macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg - %if ARCH_X86_64 && cpuflag(avx512) + %if AOM_ARCH_X86_64 && cpuflag(avx512) %assign %%i %1 %rep 16-%1 %assign %%i_high %%i+16