namespace ARCH_* defines
This prevents redefinition warnings if a toolchain defines one.
This matches the following change in libvpx:
fad865c54 namespace ARCH_* defines
Change-Id: I7cddf2a3a0cd39de9121049e48c949568ccf1a21
diff --git a/aom/src/aom_encoder.c b/aom/src/aom_encoder.c
index f9fe2fe..a4acbcc 100644
--- a/aom/src/aom_encoder.c
+++ b/aom/src/aom_encoder.c
@@ -121,7 +121,7 @@
return res;
}
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
/* On X86, disable the x87 unit's internal 80 bit precision for better
* consistency with the SSE unit's 64 bit precision.
*/
@@ -132,7 +132,7 @@
#else
#define FLOATING_POINT_SET_PRECISION
#define FLOATING_POINT_RESTORE_PRECISION
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64
#if HAVE_FEXCEPT && CONFIG_DEBUG
#define FLOATING_POINT_SET_EXCEPTIONS \
diff --git a/aom_dsp/x86/aom_subpixel_8t_ssse3.asm b/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
index 3ca7921..e5fafb0 100644
--- a/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
+++ b/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
@@ -30,7 +30,7 @@
%define LOCAL_VARS_SIZE 16*6
%macro SETUP_LOCAL_VARS 0
- ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
+ ; TODO(slavarnway): using xmm registers for these on AOM_ARCH_X86_64 +
; pmaddubsw has a higher latency on some platforms, this might be eased by
; interleaving the instructions.
%define k0k1 [rsp + 16*0]
@@ -52,7 +52,7 @@
mova k2k3, m1
mova k4k5, m2
mova k6k7, m3
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define krd m12
%define tmp0 [rsp + 16*4]
%define tmp1 [rsp + 16*5]
@@ -72,7 +72,7 @@
%endm
;-------------------------------------------------------------------------------
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define LOCAL_VARS_SIZE_H4 0
%else
%define LOCAL_VARS_SIZE_H4 16*4
@@ -83,7 +83,7 @@
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
packsswb m4, m4
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define k0k1k4k5 m8
%define k2k3k6k7 m9
%define krd m10
@@ -346,7 +346,7 @@
psraw m0, 7
psraw m4, 7
%ifidn %1, h8_add_src
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
pcmpeqb m2, m2 ;all ones
psrlw m2, 8 ;even_byte_mask
%else
@@ -383,7 +383,7 @@
; TODO(Linfeng): Detect cpu type and choose the code with better performance.
%define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1
-%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if AOM_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
%define NUM_GENERAL_REG_USED 9
%else
%define NUM_GENERAL_REG_USED 6
@@ -403,9 +403,9 @@
dec heightd
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if AOM_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define src1q r7
%define sstride6q r8
%define dst_stride dstrideq
@@ -528,7 +528,7 @@
movx [dstq], m0
%else
- ; ARCH_X86_64
+ ; AOM_ARCH_X86_64
movx m0, [srcq ] ;A
movx m1, [srcq + sstrideq ] ;B
@@ -628,7 +628,7 @@
%endif
movx [dstq], m0
-%endif ; ARCH_X86_64
+%endif ; AOM_ARCH_X86_64
.done:
REP_RET
@@ -642,9 +642,9 @@
mova m4, [filterq]
SETUP_LOCAL_VARS
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if AOM_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define src1q r7
%define sstride6q r8
%define dst_stride dstrideq
@@ -724,7 +724,7 @@
REP_RET
%else
- ; ARCH_X86_64
+ ; AOM_ARCH_X86_64
dec heightd
movu m1, [srcq ] ;A
@@ -860,7 +860,7 @@
.done:
REP_RET
-%endif ; ARCH_X86_64
+%endif ; AOM_ARCH_X86_64
%endm
diff --git a/aom_dsp/x86/blk_sse_sum_avx2.c b/aom_dsp/x86/blk_sse_sum_avx2.c
index f7c0eb0..fdf7de3 100644
--- a/aom_dsp/x86/blk_sse_sum_avx2.c
+++ b/aom_dsp/x86/blk_sse_sum_avx2.c
@@ -31,7 +31,7 @@
out_buffer = _mm256_castsi256_si128(regx_sum);
*x_sum += _mm_cvtsi128_si32(out_buffer);
out_buffer = _mm256_castsi256_si128(regx2_sum);
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
*x2_sum += _mm_cvtsi128_si64(out_buffer);
#else
{
diff --git a/aom_dsp/x86/blk_sse_sum_sse2.c b/aom_dsp/x86/blk_sse_sum_sse2.c
index ef0a024..bf89427 100644
--- a/aom_dsp/x86/blk_sse_sum_sse2.c
+++ b/aom_dsp/x86/blk_sse_sum_sse2.c
@@ -41,7 +41,7 @@
temp_buffer2 = _mm_unpackhi_epi32(regx2_sum, _mm_setzero_si128());
regx2_sum = _mm_add_epi64(temp_buffer1, temp_buffer2);
regx2_sum = _mm_add_epi64(regx2_sum, _mm_srli_si128(regx2_sum, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
*x2_sum += _mm_cvtsi128_si64(regx2_sum);
#else
{
@@ -82,7 +82,7 @@
temp_buffer2 = _mm_unpackhi_epi32(regx2_sum, _mm_setzero_si128());
regx2_sum = _mm_add_epi64(temp_buffer1, temp_buffer2);
regx2_sum = _mm_add_epi64(regx2_sum, _mm_srli_si128(regx2_sum, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
*x2_sum += _mm_cvtsi128_si64(regx2_sum);
#else
{
diff --git a/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm b/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
index c1fb259..0687904 100644
--- a/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
+++ b/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
@@ -45,7 +45,7 @@
SECTION .text
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
INIT_XMM ssse3
cglobal fdct8x8, 3, 5, 13, input, output, stride
diff --git a/aom_dsp/x86/highbd_sad4d_sse2.asm b/aom_dsp/x86/highbd_sad4d_sse2.asm
index 9442cd0..03839b4 100644
--- a/aom_dsp/x86/highbd_sad4d_sse2.asm
+++ b/aom_dsp/x86/highbd_sad4d_sse2.asm
@@ -221,21 +221,21 @@
; 3: If 0, then normal sad, if 2, then skip every other row
%macro HIGH_SADNXN4D 2-3 0
%if %3 == 0 ; normal sad
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
res, ref2, ref3, ref4
%else
cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
ref2, ref3, ref4
-%endif ; ARCH_X86_64
+%endif ; AOM_ARCH_X86_64
%else ; %3 == 2, downsample
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
cglobal highbd_sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
res, ref2, ref3, ref4
%else
cglobal highbd_sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
ref2, ref3, ref4
-%endif ; ARCH_X86_64
+%endif ; AOM_ARCH_X86_64
%endif ; sad/avg/skip
; set m1
diff --git a/aom_dsp/x86/highbd_sad_sse2.asm b/aom_dsp/x86/highbd_sad_sse2.asm
index 48b93bf..3dc4e4e 100644
--- a/aom_dsp/x86/highbd_sad_sse2.asm
+++ b/aom_dsp/x86/highbd_sad_sse2.asm
@@ -34,11 +34,11 @@
cglobal highbd_sad%1x%2_avg, 5, 1 + %3, %5, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, %5, src, src_stride, \
+cglobal highbd_sad%1x%2_avg, 5, AOM_ARCH_X86_64 + %3, %5, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define n_rowsd r7d
%else ; x86-32
%define n_rowsd dword r0m
diff --git a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
index 5c78933..c0ccc18 100644
--- a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -81,7 +81,7 @@
%endmacro
%macro INC_SRC_BY_SRC_STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
add srcq, src_stridemp
add srcq, src_stridemp
%else
@@ -94,7 +94,7 @@
%define filter_idx_shift 5
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, \
@@ -271,11 +271,11 @@
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -283,7 +283,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0, reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -498,11 +498,11 @@
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -510,7 +510,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0.5. We can reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -620,11 +620,11 @@
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -632,7 +632,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; y_offset == 0. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -719,11 +719,11 @@
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -731,7 +731,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; y_offset == 0.5. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -846,12 +846,12 @@
.x_nonhalf_y_nonhalf:
; loading filter - this is same as in 8-bit depth
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [bilin_filter+y_offsetq]
@@ -863,7 +863,7 @@
%define filter_y_b m11
%define filter_rnd m12
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; In this case, there is NO unused register. Used src_stride register. Later,
; src_stride has to be loaded from stack when it is needed.
%define tempq src_strideq
diff --git a/aom_dsp/x86/obmc_intrinsic_ssse3.h b/aom_dsp/x86/obmc_intrinsic_ssse3.h
index 48486c6..27398ff 100644
--- a/aom_dsp/x86/obmc_intrinsic_ssse3.h
+++ b/aom_dsp/x86/obmc_intrinsic_ssse3.h
@@ -24,7 +24,7 @@
static INLINE int64_t xx_hsum_epi64_si64(__m128i v_q) {
v_q = _mm_add_epi64(v_q, _mm_srli_si128(v_q, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
return _mm_cvtsi128_si64(v_q);
#else
{
diff --git a/aom_dsp/x86/sad4d_sse2.asm b/aom_dsp/x86/sad4d_sse2.asm
index 6696c40..6edad99 100644
--- a/aom_dsp/x86/sad4d_sse2.asm
+++ b/aom_dsp/x86/sad4d_sse2.asm
@@ -200,7 +200,7 @@
%define use_loop (use_ref_offset || %2 > 4)
%if %3 == 1 ; skip rows
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%if use_ref_offset
cglobal sad_skip_%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, \
ref2, ref3, ref4, cnt, ref_offset
@@ -227,7 +227,7 @@
%endif
%endif
%else ; normal sad
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%if use_ref_offset
cglobal sad%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, ref2, \
ref3, ref4, cnt, ref_offset
diff --git a/aom_dsp/x86/sad_sse2.asm b/aom_dsp/x86/sad_sse2.asm
index de9845a..dbe8ca3 100644
--- a/aom_dsp/x86/sad_sse2.asm
+++ b/aom_dsp/x86/sad_sse2.asm
@@ -42,11 +42,11 @@
cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
+cglobal sad%1x%2_avg, 5, AOM_ARCH_X86_64 + %3, 6, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define n_rowsd r7d
%else ; x86-32
%define n_rowsd dword r0m
diff --git a/aom_dsp/x86/subpel_variance_sse2.asm b/aom_dsp/x86/subpel_variance_sse2.asm
index cbf2890..d1d8373 100644
--- a/aom_dsp/x86/subpel_variance_sse2.asm
+++ b/aom_dsp/x86/subpel_variance_sse2.asm
@@ -98,7 +98,7 @@
%endmacro
%macro INC_SRC_BY_SRC_STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
add srcq, src_stridemp
%else
add srcq, src_strideq
@@ -117,7 +117,7 @@
; 11, not 13, if the registers are ordered correctly. May make a minor speed
; difference on Win64
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, dst, dst_stride, \
@@ -355,11 +355,11 @@
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -369,7 +369,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0, reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -678,11 +678,11 @@
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -692,7 +692,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ;x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0.5. We can reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -836,11 +836,11 @@
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -850,7 +850,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
;y_offset == 0. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -978,11 +978,11 @@
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -992,7 +992,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; y_offset == 0.5. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -1176,12 +1176,12 @@
STORE_AND_RET %1
.x_nonhalf_y_nonhalf:
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -1197,7 +1197,7 @@
%define filter_y_b m11
%define filter_rnd m12
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
; In this case, there is NO unused register. Used src_stride register. Later,
; src_stride has to be loaded from stack when it is needed.
%define tempq src_strideq
diff --git a/aom_dsp/x86/sum_squares_sse2.c b/aom_dsp/x86/sum_squares_sse2.c
index 25be856..cf3ed98 100644
--- a/aom_dsp/x86/sum_squares_sse2.c
+++ b/aom_dsp/x86/sum_squares_sse2.c
@@ -23,7 +23,7 @@
}
static INLINE uint64_t xx_cvtsi128_si64(__m128i a) {
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
return (uint64_t)_mm_cvtsi128_si64(a);
#else
{
diff --git a/aom_ports/x86.h b/aom_ports/x86.h
index d44d386..c089984 100644
--- a/aom_ports/x86.h
+++ b/aom_ports/x86.h
@@ -44,7 +44,7 @@
} aom_cpu_t;
#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
#define cpuid(func, func2, ax, bx, cx, dx) \
__asm__ __volatile__("cpuid \n\t" \
: "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
@@ -60,7 +60,7 @@
#endif
#elif defined(__SUNPRO_C) || \
defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
#define cpuid(func, func2, ax, bx, cx, dx) \
asm volatile( \
"xchg %rsi, %rbx \n\t" \
@@ -80,7 +80,7 @@
: "a"(func), "c"(func2))
#endif
#else /* end __SUNPRO__ */
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
#if defined(_MSC_VER) && _MSC_VER > 1500
#define cpuid(func, func2, a, b, c, d) \
do { \
@@ -258,7 +258,7 @@
asm volatile("rdtsc\n\t" : "=a"(tsc) :);
return tsc;
#else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
return (unsigned int)__rdtsc();
#else
__asm rdtsc;
@@ -276,7 +276,7 @@
asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
return ((uint64_t)hi << 32) | lo;
#else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
return (uint64_t)__rdtsc();
#else
__asm rdtsc;
@@ -298,7 +298,7 @@
unsigned int ui;
return (unsigned int)__rdtscp(&ui);
#else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
return (unsigned int)__rdtscp();
#else
__asm rdtscp;
@@ -336,7 +336,7 @@
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#define x86_pause_hint() asm volatile("pause \n\t")
#else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
#define x86_pause_hint() _mm_pause();
#else
#define x86_pause_hint() __asm pause
@@ -361,7 +361,7 @@
asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
return mode;
}
-#elif ARCH_X86_64
+#elif AOM_ARCH_X86_64
/* No fldcw intrinsics on Windows x64, punt to external asm */
extern void aom_winx64_fldcw(unsigned short mode);
extern unsigned short aom_winx64_fstcw(void);
diff --git a/av1/encoder/x86/error_intrin_sse2.c b/av1/encoder/x86/error_intrin_sse2.c
index e876db1..61f65c6 100644
--- a/av1/encoder/x86/error_intrin_sse2.c
+++ b/av1/encoder/x86/error_intrin_sse2.c
@@ -65,11 +65,11 @@
accum = reduce_sum_epi64(accum);
// Store the results.
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
return _mm_cvtsi128_si64(accum);
#else
int64_t result;
_mm_storel_epi64((__m128i *)&result, accum);
return result;
-#endif // ARCH_X86_64
+#endif // AOM_ARCH_X86_64
}
diff --git a/av1/encoder/x86/error_sse2.asm b/av1/encoder/x86/error_sse2.asm
index f4b4968..6407c10 100644
--- a/av1/encoder/x86/error_sse2.asm
+++ b/av1/encoder/x86/error_sse2.asm
@@ -75,7 +75,7 @@
movhlps m7, m6
paddq m4, m5
paddq m6, m7
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
movq rax, m4
movq [sszq], m6
%else
diff --git a/av1/encoder/x86/wedge_utils_avx2.c b/av1/encoder/x86/wedge_utils_avx2.c
index bbc62d5..9cde860 100644
--- a/av1/encoder/x86/wedge_utils_avx2.c
+++ b/av1/encoder/x86/wedge_utils_avx2.c
@@ -72,7 +72,7 @@
__m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc0_q);
__m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc0_q, 1);
v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
csse = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
#else
xx_storel_64(&csse, v_acc_q_0);
@@ -141,7 +141,7 @@
__m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc_q, 1);
v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
acc = _mm_extract_epi64(v_acc_q_0, 0);
#else
xx_storel_64(&acc, v_acc_q_0);
diff --git a/av1/encoder/x86/wedge_utils_sse2.c b/av1/encoder/x86/wedge_utils_sse2.c
index e665b2e..d7ac222 100644
--- a/av1/encoder/x86/wedge_utils_sse2.c
+++ b/av1/encoder/x86/wedge_utils_sse2.c
@@ -85,7 +85,7 @@
v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q);
#else
xx_storel_64(&csse, v_acc0_q);
@@ -174,7 +174,7 @@
v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
acc = _mm_cvtsi128_si64(v_acc_q);
#else
xx_storel_64(&acc, v_acc_q);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 726415c..d688e62 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -23,10 +23,10 @@
set_aom_detect_var(INLINE "" "Sets INLINE value for current target.")
# CPUs.
-set_aom_detect_var(ARCH_ARM 0 "Enables ARM architecture.")
-set_aom_detect_var(ARCH_PPC 0 "Enables PPC architecture.")
-set_aom_detect_var(ARCH_X86 0 "Enables X86 architecture.")
-set_aom_detect_var(ARCH_X86_64 0 "Enables X86_64 architecture.")
+set_aom_detect_var(AOM_ARCH_ARM 0 "Enables ARM architecture.")
+set_aom_detect_var(AOM_ARCH_PPC 0 "Enables PPC architecture.")
+set_aom_detect_var(AOM_ARCH_X86 0 "Enables X86 architecture.")
+set_aom_detect_var(AOM_ARCH_X86_64 0 "Enables X86_64 architecture.")
# ARM feature flags.
set_aom_detect_var(HAVE_NEON 0 "Enables NEON intrinsics optimizations.")
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
index 99ac38a..730ad42 100644
--- a/build/cmake/cpu.cmake
+++ b/build/cmake/cpu.cmake
@@ -10,7 +10,7 @@
#
if("${AOM_TARGET_CPU}" MATCHES "^arm")
- set(ARCH_ARM 1)
+ set(AOM_ARCH_ARM 1)
set(RTCD_ARCH_ARM "yes")
if(ENABLE_NEON)
@@ -34,7 +34,7 @@
endif()
elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
- set(ARCH_PPC 1)
+ set(AOM_ARCH_PPC 1)
set(RTCD_ARCH_PPC "yes")
if(ENABLE_VSX)
@@ -46,10 +46,10 @@
endif()
elseif("${AOM_TARGET_CPU}" MATCHES "^x86")
if("${AOM_TARGET_CPU}" STREQUAL "x86")
- set(ARCH_X86 1)
+ set(AOM_ARCH_X86 1)
set(RTCD_ARCH_X86 "yes")
elseif("${AOM_TARGET_CPU}" STREQUAL "x86_64")
- set(ARCH_X86_64 1)
+ set(AOM_ARCH_X86_64 1)
set(RTCD_ARCH_X86_64 "yes")
endif()
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index d5232ee..2ce3a20 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -727,14 +727,14 @@
aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x, \
filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
}
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && AOM_ARCH_X86_64
WRAP(convolve8_horiz_sse2, 8)
WRAP(convolve8_vert_sse2, 8)
WRAP(convolve8_horiz_sse2, 10)
WRAP(convolve8_vert_sse2, 10)
WRAP(convolve8_horiz_sse2, 12)
WRAP(convolve8_vert_sse2, 12)
-#endif // HAVE_SSE2 && ARCH_X86_64
+#endif // HAVE_SSE2 && AOM_ARCH_X86_64
WRAP(convolve8_horiz_c, 8)
WRAP(convolve8_vert_c, 8)
@@ -776,7 +776,7 @@
INSTANTIATE_TEST_SUITE_P(C, ConvolveTest,
::testing::ValuesIn(kArrayConvolve_c));
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && AOM_ARCH_X86_64
#if CONFIG_AV1_HIGHBITDEPTH
const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8,
wrap_convolve8_vert_sse2_8, 8);
diff --git a/test/fft_test.cc b/test/fft_test.cc
index 04d047d..5443c99 100644
--- a/test/fft_test.cc
+++ b/test/fft_test.cc
@@ -147,7 +147,7 @@
FFTTestArg(16, aom_fft16x16_float_c),
FFTTestArg(32,
aom_fft32x32_float_c)));
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P(
SSE2, FFT2DTest,
@@ -163,7 +163,7 @@
FFTTestArg(16, aom_fft16x16_float_avx2),
FFTTestArg(32, aom_fft32x32_float_avx2)));
#endif // HAVE_AVX2
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64
struct IFFTTestArg {
int n;
@@ -246,7 +246,7 @@
IFFTTestArg(8, aom_ifft8x8_float_c),
IFFTTestArg(16, aom_ifft16x16_float_c),
IFFTTestArg(32, aom_ifft32x32_float_c)));
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
#if HAVE_SSE2
INSTANTIATE_TEST_SUITE_P(
SSE2, IFFT2DTest,
@@ -263,6 +263,6 @@
IFFTTestArg(16, aom_ifft16x16_float_avx2),
IFFTTestArg(32, aom_ifft32x32_float_avx2)));
#endif // HAVE_AVX2
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64
} // namespace
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 6f58898..04e8306 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc
@@ -768,7 +768,7 @@
::testing::ValuesIn(kQParamArrayNEON));
#endif
-#if HAVE_SSSE3 && ARCH_X86_64
+#if HAVE_SSSE3 && AOM_ARCH_X86_64
INSTANTIATE_TEST_SUITE_P(
SSSE3, FullPrecisionQuantizeTest,
::testing::Values(
@@ -779,7 +779,7 @@
make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3,
static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)));
-#endif // HAVE_SSSE3 && ARCH_X86_64
+#endif // HAVE_SSSE3 && AOM_ARCH_X86_64
#if HAVE_AVX
INSTANTIATE_TEST_SUITE_P(
diff --git a/test/register_state_check.h b/test/register_state_check.h
index 0a150c3..4aad814 100644
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -26,7 +26,7 @@
// See platform implementations of RegisterStateCheck and
// RegisterStateCheckMMX for details.
-#if defined(_WIN64) && ARCH_X86_64
+#if defined(_WIN64) && AOM_ARCH_X86_64
#undef NOMINMAX
#define NOMINMAX
@@ -86,9 +86,9 @@
class RegisterStateCheck {};
} // namespace libaom_test
-#endif // _WIN64 && ARCH_X86_64
+#endif // _WIN64 && AOM_ARCH_X86_64
-#if (ARCH_X86 || ARCH_X86_64) && defined(__GNUC__)
+#if (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__)
namespace libaom_test {
// Checks the FPU tag word pre/post execution to ensure emms has been called.
@@ -122,7 +122,7 @@
class RegisterStateCheckMMX {};
} // namespace libaom_test
-#endif // (ARCH_X86 || ARCH_X86_64) && defined(__GNUC__)
+#endif // (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__)
#define API_REGISTER_STATE_CHECK(statement) \
do { \
diff --git a/test/test_libaom.cc b/test/test_libaom.cc
index 462d877..6ffbbc5 100644
--- a/test/test_libaom.cc
+++ b/test/test_libaom.cc
@@ -17,7 +17,7 @@
#include "config/aom_config.h"
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
#include "aom_ports/x86.h"
#endif
extern "C" {
@@ -26,7 +26,7 @@
extern void aom_scale_rtcd();
}
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
static void append_negative_gtest_filter(const char *str) {
std::string flag_value = GTEST_FLAG_GET(filter);
// Negative patterns begin with one '-' followed by a ':' separated list.
@@ -44,12 +44,12 @@
}
GTEST_FLAG_SET(filter, flag_value);
}
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
const int simd_caps = x86_simd_caps();
if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX");
if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE");
@@ -60,7 +60,7 @@
if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2");
if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX");
if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2");
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // AOM_ARCH_X86 || AOM_ARCH_X86_64
// Shared library builds don't support whitebox tests that exercise internal
// symbols.
diff --git a/third_party/x86inc/README.libaom b/third_party/x86inc/README.libaom
index 2f3e5c2..6b92358 100644
--- a/third_party/x86inc/README.libaom
+++ b/third_party/x86inc/README.libaom
@@ -16,3 +16,4 @@
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
Use .text with no alignment for aout.
Only use 'hidden' visibility with Chromium.
+Prefix ARCH_* with AOM_.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index e48d644..b0421f5 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -45,7 +45,7 @@
%endif
%ifndef STACK_ALIGNMENT
- %if ARCH_X86_64
+ %if AOM_ARCH_X86_64
%define STACK_ALIGNMENT 16
%else
%define STACK_ALIGNMENT 4
@@ -54,7 +54,7 @@
%define WIN64 0
%define UNIX64 0
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%ifidn __OUTPUT_FORMAT__,win32
%define WIN64 1
%elifidn __OUTPUT_FORMAT__,win64
@@ -168,7 +168,7 @@
%endif
%endif
- %if ARCH_X86_64 == 0
+ %if AOM_ARCH_X86_64 == 0
%undef PIC
%endif
@@ -277,7 +277,7 @@
%if %0 == 2
%define r%1m %2d
%define r%1mp %2
- %elif ARCH_X86_64 ; memory
+ %elif AOM_ARCH_X86_64 ; memory
%define r%1m [rstk + stack_offset + %3]
%define r%1mp qword r %+ %1 %+ m
%else
@@ -298,7 +298,7 @@
%define e%1h %3
%define r%1b %2
%define e%1b %2
- %if ARCH_X86_64 == 0
+ %if AOM_ARCH_X86_64 == 0
%define r%1 e%1
%endif
%endmacro
@@ -335,14 +335,14 @@
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
%define gprsize 8
%else
%define gprsize 4
%endif
%macro LEA 2
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
lea %1, [%2]
%elif PIC
call $+5 ; special-cased to not affect the RSB on most CPU:s
@@ -414,7 +414,7 @@
%endif
%endmacro
-%if ARCH_X86_64 == 0
+%if AOM_ARCH_X86_64 == 0
%define movsxd movifnidn
%endif
@@ -466,7 +466,7 @@
%endmacro
%define required_stack_alignment ((mmsize + 15) & ~15)
-%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
+%define vzeroupper_required (mmsize > 16 && (AOM_ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
%define high_mm_regs (16*cpuflag(avx512))
%macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
@@ -521,13 +521,13 @@
; Reserve an additional register for storing the original stack pointer, but avoid using
; eax/rax for this purpose since it can potentially get overwritten as a return value.
%assign regs_used (regs_used + 1)
- %if ARCH_X86_64 && regs_used == 7
+ %if AOM_ARCH_X86_64 && regs_used == 7
%assign regs_used 8
- %elif ARCH_X86_64 == 0 && regs_used == 1
+ %elif AOM_ARCH_X86_64 == 0 && regs_used == 1
%assign regs_used 2
%endif
%endif
- %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+ %if AOM_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
; Ensure that we don't clobber any registers containing arguments. For UNIX64 we also preserve r6 (rax)
; since it's used as a hidden argument in vararg functions to specify the number of vector registers used.
%assign regs_used 5 + UNIX64 * 3
@@ -654,7 +654,7 @@
AUTO_REP_RET
%endmacro
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
+%elif AOM_ARCH_X86_64 ; *nix x64 ;=============================================
DECLARE_REG 0, rdi
DECLARE_REG 1, rsi
@@ -1002,7 +1002,7 @@
%endif
%endif
- %if ARCH_X86_64 || cpuflag(sse2)
+ %if AOM_ARCH_X86_64 || cpuflag(sse2)
%ifdef __NASM_VER__
ALIGNMODE p6
%else
@@ -1039,7 +1039,7 @@
%endif
%assign num_mmregs 8
- %if ARCH_X86_64 && mmsize >= 16
+ %if AOM_ARCH_X86_64 && mmsize >= 16
%assign num_mmregs 16
%if cpuflag(avx512) || mmsize == 64
%assign num_mmregs 32
@@ -1064,7 +1064,7 @@
; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
%macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
- %if ARCH_X86_64 && cpuflag(avx512)
+ %if AOM_ARCH_X86_64 && cpuflag(avx512)
%assign %%i %1
%rep 16-%1
%assign %%i_high %%i+16