namespace ARCH_* defines

This prevents redefinition warnings if a toolchain defines one.
This matches the following change in libvpx:
fad865c54 namespace ARCH_* defines

Change-Id: I7cddf2a3a0cd39de9121049e48c949568ccf1a21
diff --git a/aom/src/aom_encoder.c b/aom/src/aom_encoder.c
index f9fe2fe..a4acbcc 100644
--- a/aom/src/aom_encoder.c
+++ b/aom/src/aom_encoder.c
@@ -121,7 +121,7 @@
   return res;
 }
 
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
 /* On X86, disable the x87 unit's internal 80 bit precision for better
  * consistency with the SSE unit's 64 bit precision.
  */
@@ -132,7 +132,7 @@
 #else
 #define FLOATING_POINT_SET_PRECISION
 #define FLOATING_POINT_RESTORE_PRECISION
-#endif  // ARCH_X86 || ARCH_X86_64
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
 
 #if HAVE_FEXCEPT && CONFIG_DEBUG
 #define FLOATING_POINT_SET_EXCEPTIONS \
diff --git a/aom_dsp/x86/aom_subpixel_8t_ssse3.asm b/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
index 3ca7921..e5fafb0 100644
--- a/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
+++ b/aom_dsp/x86/aom_subpixel_8t_ssse3.asm
@@ -30,7 +30,7 @@
 %define LOCAL_VARS_SIZE 16*6
 
 %macro SETUP_LOCAL_VARS 0
-    ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
+    ; TODO(slavarnway): using xmm registers for these on AOM_ARCH_X86_64 +
     ; pmaddubsw has a higher latency on some platforms, this might be eased by
     ; interleaving the instructions.
     %define    k0k1  [rsp + 16*0]
@@ -52,7 +52,7 @@
     mova       k2k3, m1
     mova       k4k5, m2
     mova       k6k7, m3
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     %define     krd  m12
     %define    tmp0  [rsp + 16*4]
     %define    tmp1  [rsp + 16*5]
@@ -72,7 +72,7 @@
 %endm
 
 ;-------------------------------------------------------------------------------
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   %define LOCAL_VARS_SIZE_H4 0
 %else
   %define LOCAL_VARS_SIZE_H4 16*4
@@ -83,7 +83,7 @@
                             src, sstride, dst, dstride, height, filter
     mova                m4, [filterq]
     packsswb            m4, m4
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     %define       k0k1k4k5  m8
     %define       k2k3k6k7  m9
     %define            krd  m10
@@ -346,7 +346,7 @@
     psraw         m0, 7
     psraw         m4, 7
 %ifidn %1, h8_add_src
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
     pcmpeqb       m2, m2                  ;all ones
     psrlw         m2, 8                   ;even_byte_mask
 %else
@@ -383,7 +383,7 @@
 ; TODO(Linfeng): Detect cpu type and choose the code with better performance.
 %define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1
 
-%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if AOM_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
     %define NUM_GENERAL_REG_USED 9
 %else
     %define NUM_GENERAL_REG_USED 6
@@ -403,9 +403,9 @@
 
     dec                 heightd
 
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if AOM_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
 
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     %define               src1q  r7
     %define           sstride6q  r8
     %define          dst_stride  dstrideq
@@ -528,7 +528,7 @@
     movx                 [dstq], m0
 
 %else
-    ; ARCH_X86_64
+    ; AOM_ARCH_X86_64
 
     movx                     m0, [srcq                ]     ;A
     movx                     m1, [srcq + sstrideq     ]     ;B
@@ -628,7 +628,7 @@
 %endif
     movx                 [dstq], m0
 
-%endif ; ARCH_X86_64
+%endif ; AOM_ARCH_X86_64
 
 .done:
     REP_RET
@@ -642,9 +642,9 @@
     mova                     m4, [filterq]
     SETUP_LOCAL_VARS
 
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if AOM_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
 
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     %define               src1q  r7
     %define           sstride6q  r8
     %define          dst_stride  dstrideq
@@ -724,7 +724,7 @@
     REP_RET
 
 %else
-    ; ARCH_X86_64
+    ; AOM_ARCH_X86_64
     dec                 heightd
 
     movu                     m1, [srcq                ]     ;A
@@ -860,7 +860,7 @@
 .done:
     REP_RET
 
-%endif ; ARCH_X86_64
+%endif ; AOM_ARCH_X86_64
 
 %endm
 
diff --git a/aom_dsp/x86/blk_sse_sum_avx2.c b/aom_dsp/x86/blk_sse_sum_avx2.c
index f7c0eb0..fdf7de3 100644
--- a/aom_dsp/x86/blk_sse_sum_avx2.c
+++ b/aom_dsp/x86/blk_sse_sum_avx2.c
@@ -31,7 +31,7 @@
   out_buffer = _mm256_castsi256_si128(regx_sum);
   *x_sum += _mm_cvtsi128_si32(out_buffer);
   out_buffer = _mm256_castsi256_si128(regx2_sum);
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   *x2_sum += _mm_cvtsi128_si64(out_buffer);
 #else
   {
diff --git a/aom_dsp/x86/blk_sse_sum_sse2.c b/aom_dsp/x86/blk_sse_sum_sse2.c
index ef0a024..bf89427 100644
--- a/aom_dsp/x86/blk_sse_sum_sse2.c
+++ b/aom_dsp/x86/blk_sse_sum_sse2.c
@@ -41,7 +41,7 @@
   temp_buffer2 = _mm_unpackhi_epi32(regx2_sum, _mm_setzero_si128());
   regx2_sum = _mm_add_epi64(temp_buffer1, temp_buffer2);
   regx2_sum = _mm_add_epi64(regx2_sum, _mm_srli_si128(regx2_sum, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   *x2_sum += _mm_cvtsi128_si64(regx2_sum);
 #else
   {
@@ -82,7 +82,7 @@
   temp_buffer2 = _mm_unpackhi_epi32(regx2_sum, _mm_setzero_si128());
   regx2_sum = _mm_add_epi64(temp_buffer1, temp_buffer2);
   regx2_sum = _mm_add_epi64(regx2_sum, _mm_srli_si128(regx2_sum, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   *x2_sum += _mm_cvtsi128_si64(regx2_sum);
 #else
   {
diff --git a/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm b/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
index c1fb259..0687904 100644
--- a/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
+++ b/aom_dsp/x86/fwd_txfm_ssse3_x86_64.asm
@@ -45,7 +45,7 @@
 
 SECTION .text
 
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 INIT_XMM ssse3
 cglobal fdct8x8, 3, 5, 13, input, output, stride
 
diff --git a/aom_dsp/x86/highbd_sad4d_sse2.asm b/aom_dsp/x86/highbd_sad4d_sse2.asm
index 9442cd0..03839b4 100644
--- a/aom_dsp/x86/highbd_sad4d_sse2.asm
+++ b/aom_dsp/x86/highbd_sad4d_sse2.asm
@@ -221,21 +221,21 @@
 ;   3: If 0, then normal sad, if 2, then skip every other row
 %macro HIGH_SADNXN4D 2-3 0
 %if %3 == 0  ; normal sad
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
                               res, ref2, ref3, ref4
 %else
 cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
                               ref2, ref3, ref4
-%endif  ; ARCH_X86_64
+%endif  ; AOM_ARCH_X86_64
 %else  ; %3 == 2, downsample
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 cglobal highbd_sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
                               res, ref2, ref3, ref4
 %else
 cglobal highbd_sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
                               ref2, ref3, ref4
-%endif  ; ARCH_X86_64
+%endif  ; AOM_ARCH_X86_64
 %endif  ; sad/avg/skip
 
 ; set m1
diff --git a/aom_dsp/x86/highbd_sad_sse2.asm b/aom_dsp/x86/highbd_sad_sse2.asm
index 48b93bf..3dc4e4e 100644
--- a/aom_dsp/x86/highbd_sad_sse2.asm
+++ b/aom_dsp/x86/highbd_sad_sse2.asm
@@ -34,11 +34,11 @@
 cglobal highbd_sad%1x%2_avg, 5, 1 + %3, %5, src, src_stride, ref, ref_stride, \
                                     second_pred, n_rows
 %else ; %3 == 7
-cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, %5, src, src_stride, \
+cglobal highbd_sad%1x%2_avg, 5, AOM_ARCH_X86_64 + %3, %5, src, src_stride, \
                                               ref, ref_stride, \
                                               second_pred, \
                                               src_stride3, ref_stride3
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 %define n_rowsd r7d
 %else ; x86-32
 %define n_rowsd dword r0m
diff --git a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
index 5c78933..c0ccc18 100644
--- a/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/aom_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -81,7 +81,7 @@
 %endmacro
 
 %macro INC_SRC_BY_SRC_STRIDE  0
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
   add                srcq, src_stridemp
   add                srcq, src_stridemp
 %else
@@ -94,7 +94,7 @@
 %define filter_idx_shift 5
 
 
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   %if %2 == 1 ; avg
     cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
                                       x_offset, y_offset, \
@@ -271,11 +271,11 @@
 
 .x_zero_y_nonhalf:
   ; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
   mova                 m8, [bilin_filter+y_offsetq]
   mova                 m9, [bilin_filter+y_offsetq+16]
   mova                m10, [GLOBAL(pw_8)]
@@ -283,7 +283,7 @@
 %define filter_y_b m9
 %define filter_rnd m10
 %else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; x_offset == 0, reuse x_offset reg
 %define tempq x_offsetq
   add y_offsetq, g_bilin_filterm
@@ -498,11 +498,11 @@
 
 .x_half_y_nonhalf:
   ; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
   mova                 m8, [bilin_filter+y_offsetq]
   mova                 m9, [bilin_filter+y_offsetq+16]
   mova                m10, [GLOBAL(pw_8)]
@@ -510,7 +510,7 @@
 %define filter_y_b m9
 %define filter_rnd m10
 %else  ; x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; x_offset == 0.5. We can reuse x_offset reg
 %define tempq x_offsetq
   add y_offsetq, g_bilin_filterm
@@ -620,11 +620,11 @@
   jnz .x_nonhalf_y_nonzero
 
   ; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
   mova                 m8, [bilin_filter+x_offsetq]
   mova                 m9, [bilin_filter+x_offsetq+16]
   mova                m10, [GLOBAL(pw_8)]
@@ -632,7 +632,7 @@
 %define filter_x_b m9
 %define filter_rnd m10
 %else    ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; y_offset == 0. We can reuse y_offset reg.
 %define tempq y_offsetq
   add x_offsetq, g_bilin_filterm
@@ -719,11 +719,11 @@
   jne .x_nonhalf_y_nonhalf
 
   ; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
   mova                 m8, [bilin_filter+x_offsetq]
   mova                 m9, [bilin_filter+x_offsetq+16]
   mova                m10, [GLOBAL(pw_8)]
@@ -731,7 +731,7 @@
 %define filter_x_b m9
 %define filter_rnd m10
 %else    ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; y_offset == 0.5. We can reuse y_offset reg.
 %define tempq y_offsetq
   add x_offsetq, g_bilin_filterm
@@ -846,12 +846,12 @@
 
 .x_nonhalf_y_nonhalf:
 ; loading filter - this is same as in 8-bit depth
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           x_offsetd, filter_idx_shift ; filter_idx_shift = 5
   shl           y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if AOM_ARCH_X86_64 && mmsize == 16
   mova                 m8, [bilin_filter+x_offsetq]
   mova                 m9, [bilin_filter+x_offsetq+16]
   mova                m10, [bilin_filter+y_offsetq]
@@ -863,7 +863,7 @@
 %define filter_y_b m11
 %define filter_rnd m12
 %else   ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; In this case, there is NO unused register. Used src_stride register. Later,
 ; src_stride has to be loaded from stack when it is needed.
 %define tempq src_strideq
diff --git a/aom_dsp/x86/obmc_intrinsic_ssse3.h b/aom_dsp/x86/obmc_intrinsic_ssse3.h
index 48486c6..27398ff 100644
--- a/aom_dsp/x86/obmc_intrinsic_ssse3.h
+++ b/aom_dsp/x86/obmc_intrinsic_ssse3.h
@@ -24,7 +24,7 @@
 
 static INLINE int64_t xx_hsum_epi64_si64(__m128i v_q) {
   v_q = _mm_add_epi64(v_q, _mm_srli_si128(v_q, 8));
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   return _mm_cvtsi128_si64(v_q);
 #else
   {
diff --git a/aom_dsp/x86/sad4d_sse2.asm b/aom_dsp/x86/sad4d_sse2.asm
index 6696c40..6edad99 100644
--- a/aom_dsp/x86/sad4d_sse2.asm
+++ b/aom_dsp/x86/sad4d_sse2.asm
@@ -200,7 +200,7 @@
 %define use_loop (use_ref_offset || %2 > 4)
 
 %if %3 == 1  ; skip rows
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 %if use_ref_offset
 cglobal sad_skip_%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, \
                                      ref2, ref3, ref4, cnt, ref_offset
@@ -227,7 +227,7 @@
 %endif
 %endif
 %else ; normal sad
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 %if use_ref_offset
 cglobal sad%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, ref2, \
                                ref3, ref4, cnt, ref_offset
diff --git a/aom_dsp/x86/sad_sse2.asm b/aom_dsp/x86/sad_sse2.asm
index de9845a..dbe8ca3 100644
--- a/aom_dsp/x86/sad_sse2.asm
+++ b/aom_dsp/x86/sad_sse2.asm
@@ -42,11 +42,11 @@
 cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
                                     second_pred, n_rows
 %else ; %3 == 7
-cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
+cglobal sad%1x%2_avg, 5, AOM_ARCH_X86_64 + %3, 6, src, src_stride, \
                                               ref, ref_stride, \
                                               second_pred, \
                                               src_stride3, ref_stride3
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
 %define n_rowsd r7d
 %else ; x86-32
 %define n_rowsd dword r0m
diff --git a/aom_dsp/x86/subpel_variance_sse2.asm b/aom_dsp/x86/subpel_variance_sse2.asm
index cbf2890..d1d8373 100644
--- a/aom_dsp/x86/subpel_variance_sse2.asm
+++ b/aom_dsp/x86/subpel_variance_sse2.asm
@@ -98,7 +98,7 @@
 %endmacro
 
 %macro INC_SRC_BY_SRC_STRIDE  0
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
   add                srcq, src_stridemp
 %else
   add                srcq, src_strideq
@@ -117,7 +117,7 @@
 ; 11, not 13, if the registers are ordered correctly. May make a minor speed
 ; difference on Win64
 
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   %if %2 == 1 ; avg
     cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
                                         x_offset, y_offset, dst, dst_stride, \
@@ -355,11 +355,11 @@
 
 .x_zero_y_nonhalf:
   ; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
   mova                 m8, [bilin_filter+y_offsetq]
 %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
   mova                 m9, [bilin_filter+y_offsetq+16]
@@ -369,7 +369,7 @@
 %define filter_y_b m9
 %define filter_rnd m10
 %else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; x_offset == 0, reuse x_offset reg
 %define tempq x_offsetq
   add y_offsetq, g_bilin_filterm
@@ -678,11 +678,11 @@
 
 .x_half_y_nonhalf:
   ; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
   mova                 m8, [bilin_filter+y_offsetq]
 %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
   mova                 m9, [bilin_filter+y_offsetq+16]
@@ -692,7 +692,7 @@
 %define filter_y_b m9
 %define filter_rnd m10
 %else  ;x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; x_offset == 0.5. We can reuse x_offset reg
 %define tempq x_offsetq
   add y_offsetq, g_bilin_filterm
@@ -836,11 +836,11 @@
   jnz .x_nonhalf_y_nonzero
 
   ; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
   mova                 m8, [bilin_filter+x_offsetq]
 %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
   mova                 m9, [bilin_filter+x_offsetq+16]
@@ -850,7 +850,7 @@
 %define filter_x_b m9
 %define filter_rnd m10
 %else    ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ;y_offset == 0. We can reuse y_offset reg.
 %define tempq y_offsetq
   add x_offsetq, g_bilin_filterm
@@ -978,11 +978,11 @@
   jne .x_nonhalf_y_nonhalf
 
   ; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
   mova                 m8, [bilin_filter+x_offsetq]
 %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
   mova                 m9, [bilin_filter+x_offsetq+16]
@@ -992,7 +992,7 @@
 %define filter_x_b m9
 %define filter_rnd m10
 %else    ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; y_offset == 0.5. We can reuse y_offset reg.
 %define tempq y_offsetq
   add x_offsetq, g_bilin_filterm
@@ -1176,12 +1176,12 @@
   STORE_AND_RET %1
 
 .x_nonhalf_y_nonhalf:
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   lea        bilin_filter, [GLOBAL(bilin_filter_m)]
 %endif
   shl           x_offsetd, filter_idx_shift
   shl           y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if AOM_ARCH_X86_64 && %1 > 4
   mova                 m8, [bilin_filter+x_offsetq]
 %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
   mova                 m9, [bilin_filter+x_offsetq+16]
@@ -1197,7 +1197,7 @@
 %define filter_y_b m11
 %define filter_rnd m12
 %else   ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if AOM_ARCH_X86=1 && CONFIG_PIC=1
 ; In this case, there is NO unused register. Used src_stride register. Later,
 ; src_stride has to be loaded from stack when it is needed.
 %define tempq src_strideq
diff --git a/aom_dsp/x86/sum_squares_sse2.c b/aom_dsp/x86/sum_squares_sse2.c
index 25be856..cf3ed98 100644
--- a/aom_dsp/x86/sum_squares_sse2.c
+++ b/aom_dsp/x86/sum_squares_sse2.c
@@ -23,7 +23,7 @@
 }
 
 static INLINE uint64_t xx_cvtsi128_si64(__m128i a) {
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   return (uint64_t)_mm_cvtsi128_si64(a);
 #else
   {
diff --git a/aom_ports/x86.h b/aom_ports/x86.h
index d44d386..c089984 100644
--- a/aom_ports/x86.h
+++ b/aom_ports/x86.h
@@ -44,7 +44,7 @@
 } aom_cpu_t;
 
 #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
 #define cpuid(func, func2, ax, bx, cx, dx)                      \
   __asm__ __volatile__("cpuid           \n\t"                   \
                        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
@@ -60,7 +60,7 @@
 #endif
 #elif defined(__SUNPRO_C) || \
     defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
 #define cpuid(func, func2, ax, bx, cx, dx)     \
   asm volatile(                                \
       "xchg %rsi, %rbx \n\t"                   \
@@ -80,7 +80,7 @@
       : "a"(func), "c"(func2))
 #endif
 #else /* end __SUNPRO__ */
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
 #if defined(_MSC_VER) && _MSC_VER > 1500
 #define cpuid(func, func2, a, b, c, d) \
   do {                                 \
@@ -258,7 +258,7 @@
   asm volatile("rdtsc\n\t" : "=a"(tsc) :);
   return tsc;
 #else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   return (unsigned int)__rdtsc();
 #else
   __asm rdtsc;
@@ -276,7 +276,7 @@
   asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
   return ((uint64_t)hi << 32) | lo;
 #else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   return (uint64_t)__rdtsc();
 #else
   __asm rdtsc;
@@ -298,7 +298,7 @@
   unsigned int ui;
   return (unsigned int)__rdtscp(&ui);
 #else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   return (unsigned int)__rdtscp();
 #else
   __asm rdtscp;
@@ -336,7 +336,7 @@
 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
 #define x86_pause_hint() asm volatile("pause \n\t")
 #else
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
 #define x86_pause_hint() _mm_pause();
 #else
 #define x86_pause_hint() __asm pause
@@ -361,7 +361,7 @@
   asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
   return mode;
 }
-#elif ARCH_X86_64
+#elif AOM_ARCH_X86_64
 /* No fldcw intrinsics on Windows x64, punt to external asm */
 extern void aom_winx64_fldcw(unsigned short mode);
 extern unsigned short aom_winx64_fstcw(void);
diff --git a/av1/encoder/x86/error_intrin_sse2.c b/av1/encoder/x86/error_intrin_sse2.c
index e876db1..61f65c6 100644
--- a/av1/encoder/x86/error_intrin_sse2.c
+++ b/av1/encoder/x86/error_intrin_sse2.c
@@ -65,11 +65,11 @@
   accum = reduce_sum_epi64(accum);
 
   // Store the results.
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   return _mm_cvtsi128_si64(accum);
 #else
   int64_t result;
   _mm_storel_epi64((__m128i *)&result, accum);
   return result;
-#endif  // ARCH_X86_64
+#endif  // AOM_ARCH_X86_64
 }
diff --git a/av1/encoder/x86/error_sse2.asm b/av1/encoder/x86/error_sse2.asm
index f4b4968..6407c10 100644
--- a/av1/encoder/x86/error_sse2.asm
+++ b/av1/encoder/x86/error_sse2.asm
@@ -75,7 +75,7 @@
   movhlps   m7, m6
   paddq     m4, m5
   paddq     m6, m7
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
   movq    rax, m4
   movq [sszq], m6
 %else
diff --git a/av1/encoder/x86/wedge_utils_avx2.c b/av1/encoder/x86/wedge_utils_avx2.c
index bbc62d5..9cde860 100644
--- a/av1/encoder/x86/wedge_utils_avx2.c
+++ b/av1/encoder/x86/wedge_utils_avx2.c
@@ -72,7 +72,7 @@
   __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc0_q);
   __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc0_q, 1);
   v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   csse = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
 #else
   xx_storel_64(&csse, v_acc_q_0);
@@ -141,7 +141,7 @@
   __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc_q, 1);
   v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
 
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   acc = _mm_extract_epi64(v_acc_q_0, 0);
 #else
   xx_storel_64(&acc, v_acc_q_0);
diff --git a/av1/encoder/x86/wedge_utils_sse2.c b/av1/encoder/x86/wedge_utils_sse2.c
index e665b2e..d7ac222 100644
--- a/av1/encoder/x86/wedge_utils_sse2.c
+++ b/av1/encoder/x86/wedge_utils_sse2.c
@@ -85,7 +85,7 @@
 
   v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8));
 
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q);
 #else
   xx_storel_64(&csse, v_acc0_q);
@@ -174,7 +174,7 @@
 
   v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
 
-#if ARCH_X86_64
+#if AOM_ARCH_X86_64
   acc = _mm_cvtsi128_si64(v_acc_q);
 #else
   xx_storel_64(&acc, v_acc_q);
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 726415c..d688e62 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -23,10 +23,10 @@
 set_aom_detect_var(INLINE "" "Sets INLINE value for current target.")
 
 # CPUs.
-set_aom_detect_var(ARCH_ARM 0 "Enables ARM architecture.")
-set_aom_detect_var(ARCH_PPC 0 "Enables PPC architecture.")
-set_aom_detect_var(ARCH_X86 0 "Enables X86 architecture.")
-set_aom_detect_var(ARCH_X86_64 0 "Enables X86_64 architecture.")
+set_aom_detect_var(AOM_ARCH_ARM 0 "Enables ARM architecture.")
+set_aom_detect_var(AOM_ARCH_PPC 0 "Enables PPC architecture.")
+set_aom_detect_var(AOM_ARCH_X86 0 "Enables X86 architecture.")
+set_aom_detect_var(AOM_ARCH_X86_64 0 "Enables X86_64 architecture.")
 
 # ARM feature flags.
 set_aom_detect_var(HAVE_NEON 0 "Enables NEON intrinsics optimizations.")
diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
index 99ac38a..730ad42 100644
--- a/build/cmake/cpu.cmake
+++ b/build/cmake/cpu.cmake
@@ -10,7 +10,7 @@
 #
 
 if("${AOM_TARGET_CPU}" MATCHES "^arm")
-  set(ARCH_ARM 1)
+  set(AOM_ARCH_ARM 1)
   set(RTCD_ARCH_ARM "yes")
 
   if(ENABLE_NEON)
@@ -34,7 +34,7 @@
   endif()
 
 elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
-  set(ARCH_PPC 1)
+  set(AOM_ARCH_PPC 1)
   set(RTCD_ARCH_PPC "yes")
 
   if(ENABLE_VSX)
@@ -46,10 +46,10 @@
   endif()
 elseif("${AOM_TARGET_CPU}" MATCHES "^x86")
   if("${AOM_TARGET_CPU}" STREQUAL "x86")
-    set(ARCH_X86 1)
+    set(AOM_ARCH_X86 1)
     set(RTCD_ARCH_X86 "yes")
   elseif("${AOM_TARGET_CPU}" STREQUAL "x86_64")
-    set(ARCH_X86_64 1)
+    set(AOM_ARCH_X86_64 1)
     set(RTCD_ARCH_X86_64 "yes")
   endif()
 
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index d5232ee..2ce3a20 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -727,14 +727,14 @@
     aom_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
                       filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
   }
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && AOM_ARCH_X86_64
 WRAP(convolve8_horiz_sse2, 8)
 WRAP(convolve8_vert_sse2, 8)
 WRAP(convolve8_horiz_sse2, 10)
 WRAP(convolve8_vert_sse2, 10)
 WRAP(convolve8_horiz_sse2, 12)
 WRAP(convolve8_vert_sse2, 12)
-#endif  // HAVE_SSE2 && ARCH_X86_64
+#endif  // HAVE_SSE2 && AOM_ARCH_X86_64
 
 WRAP(convolve8_horiz_c, 8)
 WRAP(convolve8_vert_c, 8)
@@ -776,7 +776,7 @@
 INSTANTIATE_TEST_SUITE_P(C, ConvolveTest,
                          ::testing::ValuesIn(kArrayConvolve_c));
 
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && AOM_ARCH_X86_64
 #if CONFIG_AV1_HIGHBITDEPTH
 const ConvolveFunctions wrap_convolve8_sse2(wrap_convolve8_horiz_sse2_8,
                                             wrap_convolve8_vert_sse2_8, 8);
diff --git a/test/fft_test.cc b/test/fft_test.cc
index 04d047d..5443c99 100644
--- a/test/fft_test.cc
+++ b/test/fft_test.cc
@@ -147,7 +147,7 @@
                                            FFTTestArg(16, aom_fft16x16_float_c),
                                            FFTTestArg(32,
                                                       aom_fft32x32_float_c)));
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
 #if HAVE_SSE2
 INSTANTIATE_TEST_SUITE_P(
     SSE2, FFT2DTest,
@@ -163,7 +163,7 @@
                       FFTTestArg(16, aom_fft16x16_float_avx2),
                       FFTTestArg(32, aom_fft32x32_float_avx2)));
 #endif  // HAVE_AVX2
-#endif  // ARCH_X86 || ARCH_X86_64
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
 
 struct IFFTTestArg {
   int n;
@@ -246,7 +246,7 @@
                       IFFTTestArg(8, aom_ifft8x8_float_c),
                       IFFTTestArg(16, aom_ifft16x16_float_c),
                       IFFTTestArg(32, aom_ifft32x32_float_c)));
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
 #if HAVE_SSE2
 INSTANTIATE_TEST_SUITE_P(
     SSE2, IFFT2DTest,
@@ -263,6 +263,6 @@
                       IFFTTestArg(16, aom_ifft16x16_float_avx2),
                       IFFTTestArg(32, aom_ifft32x32_float_avx2)));
 #endif  // HAVE_AVX2
-#endif  // ARCH_X86 || ARCH_X86_64
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
 
 }  // namespace
diff --git a/test/quantize_func_test.cc b/test/quantize_func_test.cc
index 6f58898..04e8306 100644
--- a/test/quantize_func_test.cc
+++ b/test/quantize_func_test.cc
@@ -768,7 +768,7 @@
                          ::testing::ValuesIn(kQParamArrayNEON));
 #endif
 
-#if HAVE_SSSE3 && ARCH_X86_64
+#if HAVE_SSSE3 && AOM_ARCH_X86_64
 INSTANTIATE_TEST_SUITE_P(
     SSSE3, FullPrecisionQuantizeTest,
     ::testing::Values(
@@ -779,7 +779,7 @@
         make_tuple(&aom_quantize_b_64x64_c, &aom_quantize_b_64x64_ssse3,
                    static_cast<TX_SIZE>(TX_64X64), TYPE_B, AOM_BITS_8)));
 
-#endif  // HAVE_SSSE3 && ARCH_X86_64
+#endif  // HAVE_SSSE3 && AOM_ARCH_X86_64
 
 #if HAVE_AVX
 INSTANTIATE_TEST_SUITE_P(
diff --git a/test/register_state_check.h b/test/register_state_check.h
index 0a150c3..4aad814 100644
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -26,7 +26,7 @@
 //   See platform implementations of RegisterStateCheck and
 //   RegisterStateCheckMMX for details.
 
-#if defined(_WIN64) && ARCH_X86_64
+#if defined(_WIN64) && AOM_ARCH_X86_64
 
 #undef NOMINMAX
 #define NOMINMAX
@@ -86,9 +86,9 @@
 class RegisterStateCheck {};
 }  // namespace libaom_test
 
-#endif  // _WIN64 && ARCH_X86_64
+#endif  // _WIN64 && AOM_ARCH_X86_64
 
-#if (ARCH_X86 || ARCH_X86_64) && defined(__GNUC__)
+#if (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__)
 namespace libaom_test {
 
 // Checks the FPU tag word pre/post execution to ensure emms has been called.
@@ -122,7 +122,7 @@
 class RegisterStateCheckMMX {};
 }  // namespace libaom_test
 
-#endif  // (ARCH_X86 || ARCH_X86_64) && defined(__GNUC__)
+#endif  // (AOM_ARCH_X86 || AOM_ARCH_X86_64) && defined(__GNUC__)
 
 #define API_REGISTER_STATE_CHECK(statement)           \
   do {                                                \
diff --git a/test/test_libaom.cc b/test/test_libaom.cc
index 462d877..6ffbbc5 100644
--- a/test/test_libaom.cc
+++ b/test/test_libaom.cc
@@ -17,7 +17,7 @@
 
 #include "config/aom_config.h"
 
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
 #include "aom_ports/x86.h"
 #endif
 extern "C" {
@@ -26,7 +26,7 @@
 extern void aom_scale_rtcd();
 }
 
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
 static void append_negative_gtest_filter(const char *str) {
   std::string flag_value = GTEST_FLAG_GET(filter);
   // Negative patterns begin with one '-' followed by a ':' separated list.
@@ -44,12 +44,12 @@
   }
   GTEST_FLAG_SET(filter, flag_value);
 }
-#endif  // ARCH_X86 || ARCH_X86_64
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
 
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
 
-#if ARCH_X86 || ARCH_X86_64
+#if AOM_ARCH_X86 || AOM_ARCH_X86_64
   const int simd_caps = x86_simd_caps();
   if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter("MMX");
   if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter("SSE");
@@ -60,7 +60,7 @@
   if (!(simd_caps & HAS_SSE4_2)) append_negative_gtest_filter("SSE4_2");
   if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter("AVX");
   if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter("AVX2");
-#endif  // ARCH_X86 || ARCH_X86_64
+#endif  // AOM_ARCH_X86 || AOM_ARCH_X86_64
 
 // Shared library builds don't support whitebox tests that exercise internal
 // symbols.
diff --git a/third_party/x86inc/README.libaom b/third_party/x86inc/README.libaom
index 2f3e5c2..6b92358 100644
--- a/third_party/x86inc/README.libaom
+++ b/third_party/x86inc/README.libaom
@@ -16,3 +16,4 @@
 Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
 Use .text with no alignment for aout.
 Only use 'hidden' visibility with Chromium.
+Prefix ARCH_* with AOM_.
diff --git a/third_party/x86inc/x86inc.asm b/third_party/x86inc/x86inc.asm
index e48d644..b0421f5 100644
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -45,7 +45,7 @@
 %endif
 
 %ifndef STACK_ALIGNMENT
-    %if ARCH_X86_64
+    %if AOM_ARCH_X86_64
         %define STACK_ALIGNMENT 16
     %else
         %define STACK_ALIGNMENT 4
@@ -54,7 +54,7 @@
 
 %define WIN64  0
 %define UNIX64 0
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     %ifidn __OUTPUT_FORMAT__,win32
         %define WIN64  1
     %elifidn __OUTPUT_FORMAT__,win64
@@ -168,7 +168,7 @@
         %endif
     %endif
 
-    %if ARCH_X86_64 == 0
+    %if AOM_ARCH_X86_64 == 0
         %undef PIC
     %endif
 
@@ -277,7 +277,7 @@
     %if %0 == 2
         %define r%1m  %2d
         %define r%1mp %2
-    %elif ARCH_X86_64 ; memory
+    %elif AOM_ARCH_X86_64 ; memory
         %define r%1m [rstk + stack_offset + %3]
         %define r%1mp qword r %+ %1 %+ m
     %else
@@ -298,7 +298,7 @@
     %define e%1h %3
     %define r%1b %2
     %define e%1b %2
-    %if ARCH_X86_64 == 0
+    %if AOM_ARCH_X86_64 == 0
         %define r%1 e%1
     %endif
 %endmacro
@@ -335,14 +335,14 @@
 
 DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     %define gprsize 8
 %else
     %define gprsize 4
 %endif
 
 %macro LEA 2
-%if ARCH_X86_64
+%if AOM_ARCH_X86_64
     lea %1, [%2]
 %elif PIC
     call $+5 ; special-cased to not affect the RSB on most CPU:s
@@ -414,7 +414,7 @@
     %endif
 %endmacro
 
-%if ARCH_X86_64 == 0
+%if AOM_ARCH_X86_64 == 0
     %define movsxd movifnidn
 %endif
 
@@ -466,7 +466,7 @@
 %endmacro
 
 %define required_stack_alignment ((mmsize + 15) & ~15)
-%define vzeroupper_required (mmsize > 16 && (ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
+%define vzeroupper_required (mmsize > 16 && (AOM_ARCH_X86_64 == 0 || xmm_regs_used > 16 || notcpuflag(avx512)))
 %define high_mm_regs (16*cpuflag(avx512))
 
 %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only)
@@ -521,13 +521,13 @@
                 ; Reserve an additional register for storing the original stack pointer, but avoid using
                 ; eax/rax for this purpose since it can potentially get overwritten as a return value.
                 %assign regs_used (regs_used + 1)
-                %if ARCH_X86_64 && regs_used == 7
+                %if AOM_ARCH_X86_64 && regs_used == 7
                     %assign regs_used 8
-                %elif ARCH_X86_64 == 0 && regs_used == 1
+                %elif AOM_ARCH_X86_64 == 0 && regs_used == 1
                     %assign regs_used 2
                 %endif
             %endif
-            %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+            %if AOM_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
                 ; Ensure that we don't clobber any registers containing arguments. For UNIX64 we also preserve r6 (rax)
                 ; since it's used as a hidden argument in vararg functions to specify the number of vector registers used.
                 %assign regs_used 5 + UNIX64 * 3
@@ -654,7 +654,7 @@
     AUTO_REP_RET
 %endmacro
 
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
+%elif AOM_ARCH_X86_64 ; *nix x64 ;=============================================
 
 DECLARE_REG 0,  rdi
 DECLARE_REG 1,  rsi
@@ -1002,7 +1002,7 @@
         %endif
     %endif
 
-    %if ARCH_X86_64 || cpuflag(sse2)
+    %if AOM_ARCH_X86_64 || cpuflag(sse2)
         %ifdef __NASM_VER__
             ALIGNMODE p6
         %else
@@ -1039,7 +1039,7 @@
     %endif
 
     %assign num_mmregs 8
-    %if ARCH_X86_64 && mmsize >= 16
+    %if AOM_ARCH_X86_64 && mmsize >= 16
         %assign num_mmregs 16
         %if cpuflag(avx512) || mmsize == 64
             %assign num_mmregs 32
@@ -1064,7 +1064,7 @@
 
 ; Prefer registers 16-31 over 0-15 to avoid having to use vzeroupper
 %macro AVX512_MM_PERMUTATION 0-1 0 ; start_reg
-    %if ARCH_X86_64 && cpuflag(avx512)
+    %if AOM_ARCH_X86_64 && cpuflag(avx512)
         %assign %%i %1
         %rep 16-%1
             %assign %%i_high %%i+16