s/__aarch64__/AOM_ARCH_AARCH64/ This allows AArch64 to be correctly detected when building with Visual Studio (cl.exe). There are still test failures, however. Microsoft's compiler doesn't define __ARM_FEATURE_*. To use those paths we may need to rely on _M_ARM64_EXTENSION. This change is similar to the one in libvpx: 57b9afa58 s/__aarch64__/VPX_ARCH_AARCH64/ Bug: b/277255390 Change-Id: Ie3174d59c1bcdab5677b81b0176cc363cf18018f

commit: fe7676b2c113501b3f1ef399b16f8aebd0572c44 [log] [tgz]
author: James Zern <jzern@google.com> Mon May 22 13:18:43 2023 -0700
committer: James Zern <jzern@google.com> Thu Jun 01 20:01:27 2023 +0000
tree: 66be74865012e9429ef3f4957b95126e72842ba2
parent: e733ed5b6417658eb304cc55cdbbb07e3dd766ec [diff]
diff --git a/aom_dsp/arm/avg_neon.c b/aom_dsp/arm/avg_neon.c
index 2da64b4..1bb58e4 100644
--- a/aom_dsp/arm/avg_neon.c
+++ b/aom_dsp/arm/avg_neon.c

@@ -18,7 +18,7 @@
 #include "aom_dsp/arm/transpose_neon.h"
 #include "aom_ports/mem.h"
 
-#if !defined(__aarch64__)
+#if !AOM_ARCH_AARCH64
 static INLINE uint32x2_t horizontal_add_u16x8_v(const uint16x8_t a) {
   const uint32x4_t b = vpaddlq_u16(a);
   const uint64x2_t c = vpaddlq_u32(b);
@@ -30,7 +30,7 @@
 unsigned int aom_avg_4x4_neon(const uint8_t *a, int a_stride) {
   const uint8x16_t b = load_unaligned_u8q(a, a_stride);
   const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b));
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const uint32_t d = vaddlvq_u16(c);
   return (d + 8) >> 4;
 #else
@@ -53,7 +53,7 @@
     sum = vaddw_u8(sum, e);
   }
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const uint32_t d = vaddlvq_u16(sum);
   return (d + 32) >> 6;
 #else
@@ -216,7 +216,7 @@
     v_mean = vpadalq_s16(v_mean, diff);
     v_low = vget_low_s16(diff);
     v_sse = vmlal_s16(v_sse, v_low, v_low);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     v_sse = vmlal_high_s16(v_sse, diff, diff);
 #else
     const int16x4_t v_high = vget_high_s16(diff);
@@ -259,7 +259,7 @@
   const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max);
   const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   *min = *max = 0;  // Clear high bits
   *((uint8_t *)max) = vmaxvq_u8(ab07_max);
   *((uint8_t *)min) = vminvq_u8(ab07_min);

diff --git a/aom_dsp/arm/highbd_avg_neon.c b/aom_dsp/arm/highbd_avg_neon.c
index 0483a83..13cce03 100644
--- a/aom_dsp/arm/highbd_avg_neon.c
+++ b/aom_dsp/arm/highbd_avg_neon.c

@@ -98,7 +98,7 @@
   const uint16x8_t min4567 = vminq_u16(min45, min67);
   const uint16x8_t min07 = vminq_u16(min0123, min4567);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   *max = (int)vmaxvq_u16(max07);
   *min = (int)vminvq_u16(min07);
 #else

diff --git a/aom_dsp/arm/highbd_loopfilter_neon.c b/aom_dsp/arm/highbd_loopfilter_neon.c
index 0b720ce..2b5128e 100644
--- a/aom_dsp/arm/highbd_loopfilter_neon.c
+++ b/aom_dsp/arm/highbd_loopfilter_neon.c

@@ -247,12 +247,12 @@
   filter4_masks(p0q0, p1q1, hev_thresh, outer_mask, inner_thresh, &hev_mask,
                 &needs_filter4_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter4_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // Copy the masks to the high bits for packed comparisons later.
   const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
@@ -313,12 +313,12 @@
   filter4_masks(p0q0, p1q1, hev_thresh, outer_mask, inner_thresh, &hev_mask,
                 &needs_filter4_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter4_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // Copy the masks to the high bits for packed comparisons later.
   const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
@@ -437,12 +437,12 @@
   filter6_masks(p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh, bd,
                 &needs_filter_mask, &is_flat3_mask, &hev_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // Copy the masks to the high bits for packed comparisons later.
   const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
@@ -528,12 +528,12 @@
   filter6_masks(p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh, bd,
                 &needs_filter_mask, &is_flat3_mask, &hev_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // Copy the masks to the high bits for packed comparisons later.
   const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
@@ -684,12 +684,12 @@
   filter8_masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
                 bd, &needs_filter_mask, &is_flat4_mask, &hev_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // Copy the masks to the high bits for packed comparisons later.
   const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
@@ -783,12 +783,12 @@
   filter8_masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
                 bd, &needs_filter_mask, &is_flat4_mask, &hev_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // Copy the masks to the high bits for packed comparisons later.
   const uint16x8_t hev_mask_8 = vcombine_u16(hev_mask, hev_mask);
@@ -976,12 +976,12 @@
   filter8_masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
                 bd, &needs_filter_mask, &is_flat4_mask, &hev_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
   const uint16x8_t p4q4 = vcombine_u16(src[2], src[11]);
   const uint16x8_t p5q5 = vcombine_u16(src[1], src[12]);
   const uint16x8_t p6q6 = vcombine_u16(src[0], src[13]);
@@ -1083,7 +1083,7 @@
 static INLINE uint16x8x2_t permute_acdb64(const uint16x8_t ab,
                                           const uint16x8_t cd) {
   uint16x8x2_t acdb;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   // a[b] <- [c]d
   acdb.val[0] = vreinterpretq_u16_u64(
       vtrn1q_u64(vreinterpretq_u64_u16(ab), vreinterpretq_u64_u16(cd)));
@@ -1099,7 +1099,7 @@
   acdb.val[1] = vreinterpretq_u16_u64(
       vsetq_lane_u64(vgetq_lane_u64(vreinterpretq_u64_u16(cd), 1),
                      vreinterpretq_u64_u16(ab), 0));
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
   return acdb;
 }
 
@@ -1144,12 +1144,12 @@
   filter8_masks(p3q3, p2q2, p1q1, p0q0, hev_thresh, outer_mask, inner_thresh,
                 bd, &needs_filter_mask, &is_flat4_mask, &hev_mask);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (vaddv_u16(needs_filter_mask) == 0) {
     // None of the values will be filtered.
     return;
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
   const uint16x8_t p4q4 =
       vcombine_u16(vget_low_u16(src_p[3]), vget_high_u16(src_q[0]));
   const uint16x8_t p5q5 =

diff --git a/aom_dsp/arm/highbd_quantize_neon.c b/aom_dsp/arm/highbd_quantize_neon.c
index 927e13c..1b7e046 100644
--- a/aom_dsp/arm/highbd_quantize_neon.c
+++ b/aom_dsp/arm/highbd_quantize_neon.c

@@ -19,7 +19,7 @@
 #include "av1/encoder/av1_quantize.h"
 
 static INLINE uint32_t sum_abs_coeff(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddvq_u32(a);
 #else
   const uint64x2_t b = vpaddlq_u32(a);
@@ -98,7 +98,7 @@
 }
 
 static INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   return (uint16_t)vmaxvq_s16(v_eobmax);
 #else
   const int16x4_t v_eobmax_3210 =
@@ -116,7 +116,7 @@
 }
 
 static INLINE uint16_t get_min_eob(int16x8_t v_eobmin) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   return (uint16_t)vminvq_s16(v_eobmin);
 #else
   const int16x4_t v_eobmin_3210 =

diff --git a/aom_dsp/arm/intrapred_neon.c b/aom_dsp/arm/intrapred_neon.c
index 58b31a3..2161378 100644
--- a/aom_dsp/arm/intrapred_neon.c
+++ b/aom_dsp/arm/intrapred_neon.c

@@ -84,7 +84,7 @@
 }
 
 static INLINE uint16x8_t horizontal_add_and_broadcast_u16x8(uint16x8_t a) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   // On AArch64 we could also use vdupq_n_u16(vaddvq_u16(a)) here to save an
   // instruction, however the addv instruction is usually slightly more
   // expensive than a pairwise addition, so the need for immediately
@@ -1528,13 +1528,13 @@
   int16x4_t v_frac_bits_y = vdup_n_s16(-frac_bits_y);
   int16x4_t min_base_y64 = vdup_n_s16(min_base_y);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   // Use ext rather than loading left + 14 directly to avoid over-read.
   const uint8x16_t left_m2 = vld1q_u8(left - 2);
   const uint8x16_t left_0 = vld1q_u8(left);
   const uint8x16_t left_14 = vextq_u8(left_0, left_0, 14);
   const uint8x16x2_t left_vals = { { left_m2, left_14 } };
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   for (int r = 0; r < N; r++) {
     uint16x8_t res, shift;
@@ -1593,13 +1593,13 @@
       // Values in base_y_c64 range from -2 through 14 inclusive.
       base_y_c64 = vbic_s16(base_y_c64, vreinterpret_s16_u16(mask64));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       uint8x8_t left_idx0 = vreinterpret_u8_s16(base_y_c64 + 2);  // [0, 16]
       uint8x8_t left_idx1 = vreinterpret_u8_s16(base_y_c64 + 3);  // [1, 17]
 
       uint8x8_t a0_y = vtrn1_u8(vqtbl2_u8(left_vals, left_idx0), v_zero_u8);
       uint8x8_t a1_y = vtrn1_u8(vqtbl2_u8(left_vals, left_idx1), v_zero_u8);
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       DECLARE_ALIGNED(32, int16_t, base_y_c[4]);
 
       vst1_s16(base_y_c, base_y_c64);
@@ -1616,7 +1616,7 @@
       a1_y = vld1_lane_u8(left + base_y_c[1], a1_y, 2);
       a1_y = vld1_lane_u8(left + base_y_c[2], a1_y, 4);
       a1_y = vld1_lane_u8(left + base_y_c[3], a1_y, 6);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
       if (upsample_left) {
         v_shift.val[1] = vshr_n_u16(
@@ -1696,7 +1696,7 @@
   uint16x8_t c1234 = vcombine_u16(vcreate_u16(0x0004000300020001),
                                   vcreate_u16(0x0008000700060005));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   // Use ext rather than loading left + 30 directly to avoid over-read.
   const uint8x16_t left_m2 = vld1q_u8(left - 2);
   const uint8x16_t left_0 = vld1q_u8(left + 0);
@@ -1704,7 +1704,7 @@
   const uint8x16_t left_14 = vextq_u8(left_0, left_16, 14);
   const uint8x16_t left_30 = vextq_u8(left_16, left_16, 14);
   const uint8x16x3_t left_vals = { { left_m2, left_14, left_30 } };
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   for (int r = 0; r < N; r++) {
     uint8x8_t resx, resy, resxy;
@@ -1776,7 +1776,7 @@
       // Values in base_y_c128 range from -2 through 31 inclusive.
       base_y_c128 = vbicq_s16(base_y_c128, vreinterpretq_s16_u16(mask128));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       uint8x16_t left_idx0 = vreinterpretq_u8_s16(base_y_c128 + 2);  // [0, 33]
       uint8x16_t left_idx1 = vreinterpretq_u8_s16(base_y_c128 + 3);  // [1, 34]
       uint8x16_t left_idx01 = vuzp1q_u8(left_idx0, left_idx1);
@@ -1784,7 +1784,7 @@
       uint8x16_t a01_x = vqtbl3q_u8(left_vals, left_idx01);
       uint8x8_t a0_x1 = vget_low_u8(a01_x);
       uint8x8_t a1_x1 = vget_high_u8(a01_x);
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       DECLARE_ALIGNED(32, int16_t, base_y_c[16]);
 
       vst1q_s16(base_y_c, base_y_c128);
@@ -1809,7 +1809,7 @@
       a1_x1 = vld1_lane_u8(left + base_y_c[5], a1_x1, 5);
       a1_x1 = vld1_lane_u8(left + base_y_c[6], a1_x1, 6);
       a1_x1 = vld1_lane_u8(left + base_y_c[7], a1_x1, 7);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
       if (upsample_left) {
         shift.val[1] = vshrq_n_u16(
@@ -1867,7 +1867,7 @@
   c1234.val[0] = vaddq_u16(c0123.val[0], c1);
   c1234.val[1] = vaddq_u16(c0123.val[1], c1);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const uint8x16_t left_m1 = vld1q_u8(left - 1);
   const uint8x16_t left_0 = vld1q_u8(left + 0);
   const uint8x16_t left_16 = vld1q_u8(left + 16);
@@ -1878,7 +1878,7 @@
   const uint8x16_t left_47 = vextq_u8(left_32, left_48, 15);
   const uint8x16x4_t left_vals0 = { { left_m1, left_15, left_31, left_47 } };
   const uint8x16x4_t left_vals1 = { { left_0, left_16, left_32, left_48 } };
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   for (int r = 0; r < H; r++) {
     uint16x8x2_t res, r6, shift;
@@ -1991,7 +1991,7 @@
           a0_y128 = vandq_u8(a0_y128, v_loadmaskz2);
           a1_y128 = vld1q_u8(left + min_y + 1);
           a1_y128 = vandq_u8(a1_y128, v_loadmaskz2);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
           a0_y128 = vqtbl1q_u8(a0_y128, vreinterpretq_u8_s8(base_y_offset128));
           a1_y128 = vqtbl1q_u8(a1_y128, vreinterpretq_u8_s8(base_y_offset128));
 #else
@@ -2023,7 +2023,7 @@
           base_y_c256.val[1] = vbicq_s16(base_y_c256.val[1],
                                          vreinterpretq_s16_u16(mask256.val[1]));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
           // Values in left_idx{0,1} range from 0 through 63 inclusive.
           uint8x16_t left_idx0 = vreinterpretq_u8_s16(base_y_c256.val[0] + 1);
           uint8x16_t left_idx1 = vreinterpretq_u8_s16(base_y_c256.val[1] + 1);
@@ -2037,7 +2037,7 @@
           a0_y1 = vget_high_u8(a0_y01);
           a1_y0 = vget_low_u8(a1_y01);
           a1_y1 = vget_high_u8(a1_y01);
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
           DECLARE_ALIGNED(32, int16_t, base_y_c[16]);
 
           vst1q_s16(base_y_c, base_y_c256.val[0]);
@@ -2086,7 +2086,7 @@
           a1_y1 = vld1_lane_u8(left + base_y_c[13], a1_y1, 5);
           a1_y1 = vld1_lane_u8(left + base_y_c[14], a1_y1, 6);
           a1_y1 = vld1_lane_u8(left + base_y_c[15], a1_y1, 7);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
         }
 
         shifty.val[0] = vshrq_n_u16(
@@ -2318,7 +2318,7 @@
   w11 = vzipq_u32(vreinterpretq_u32_u16(w6.val[1]),
                   vreinterpretq_u32_u16(w7.val[1]));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[0] = vzip1q_u64(vreinterpretq_u64_u32(w8.val[0]),
                     vreinterpretq_u64_u32(w9.val[0]));
   d[1] = vzip2q_u64(vreinterpretq_u64_u32(w8.val[0]),
@@ -2388,7 +2388,7 @@
   w15 = vzipq_u32(vreinterpretq_u32_u16(w10.val[1]),
                   vreinterpretq_u32_u16(w11.val[1]));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[0] = vzip1q_u64(vreinterpretq_u64_u32(w12.val[0]),
                     vreinterpretq_u64_u32(w13.val[0]));
   d[1] = vzip2q_u64(vreinterpretq_u64_u32(w12.val[0]),
@@ -2443,7 +2443,7 @@
   w15 = vzipq_u32(vreinterpretq_u32_u16(w10.val[1]),
                   vreinterpretq_u32_u16(w11.val[1]));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[8] = vzip1q_u64(vreinterpretq_u64_u32(w12.val[0]),
                     vreinterpretq_u64_u32(w13.val[0]));
   d[9] = vzip2q_u64(vreinterpretq_u64_u32(w12.val[0]),
@@ -2516,7 +2516,7 @@
 
   // Store first 4-line result
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[0].val[0] = vzip1q_u64(vreinterpretq_u64_u32(w6.val[0]),
                            vreinterpretq_u64_u32(w14.val[0]));
   d[0].val[1] = vzip2q_u64(vreinterpretq_u64_u32(w6.val[0]),
@@ -2572,7 +2572,7 @@
 
   // Store second 4-line result
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[4].val[0] = vzip1q_u64(vreinterpretq_u64_u32(w6.val[0]),
                            vreinterpretq_u64_u32(w14.val[0]));
   d[4].val[1] = vzip2q_u64(vreinterpretq_u64_u32(w6.val[0]),
@@ -2639,7 +2639,7 @@
 
   // Store first 4-line result
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[8].val[0] = vzip1q_u64(vreinterpretq_u64_u32(w6.val[0]),
                            vreinterpretq_u64_u32(w14.val[0]));
   d[8].val[1] = vzip2q_u64(vreinterpretq_u64_u32(w6.val[0]),
@@ -2695,7 +2695,7 @@
 
   // Store second 4-line result
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   d[12].val[0] = vzip1q_u64(vreinterpretq_u64_u32(w6.val[0]),
                             vreinterpretq_u64_u32(w14.val[0]));
   d[12].val[1] = vzip2q_u64(vreinterpretq_u64_u32(w6.val[0]),

diff --git a/aom_dsp/arm/obmc_sad_neon.c b/aom_dsp/arm/obmc_sad_neon.c
index 60656e5..a692cbb 100644
--- a/aom_dsp/arm/obmc_sad_neon.c
+++ b/aom_dsp/arm/obmc_sad_neon.c

@@ -37,7 +37,7 @@
   *sum = vrsraq_n_u32(*sum, abs_hi, 12);
 }
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
 
 // Use tbl for doing a double-width zero extension from 8->32 bits since we can
 // do this in one instruction rather than two (indices out of range (255 here)
@@ -110,7 +110,7 @@
   return horizontal_add_u32x4(vaddq_u32(sum[0], sum[1]));
 }
 
-#else  // !defined(__aarch64__)
+#else  // !AOM_ARCH_AARCH64
 
 static INLINE unsigned int obmc_sad_large_neon(const uint8_t *ref,
                                                int ref_stride,
@@ -144,7 +144,7 @@
   return horizontal_add_u32x4(sum);
 }
 
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
 static INLINE unsigned int obmc_sad_128xh_neon(const uint8_t *ref,
                                                int ref_stride,

diff --git a/aom_dsp/arm/obmc_variance_neon.c b/aom_dsp/arm/obmc_variance_neon.c
index 8702ba6..50cd5f3 100644
--- a/aom_dsp/arm/obmc_variance_neon.c
+++ b/aom_dsp/arm/obmc_variance_neon.c

@@ -55,7 +55,7 @@
   *ssev = vmlaq_s32(*ssev, round_s32_hi, round_s32_hi);
 }
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
 
 // Use tbl for doing a double-width zero extension from 8->32 bits since we can
 // do this in one instruction rather than two (indices out of range (255 here)
@@ -140,7 +140,7 @@
   *sum = horizontal_add_s32x4(sumv);
 }
 
-#else  // !defined(__aarch64__)
+#else  // !AOM_ARCH_AARCH64
 
 static INLINE void obmc_variance_large_neon(const uint8_t *pre, int pre_stride,
                                             const int32_t *wsrc,
@@ -180,7 +180,7 @@
   *sum = horizontal_add_s32x4(sumv);
 }
 
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
 static INLINE void obmc_variance_neon_128xh(const uint8_t *pre, int pre_stride,
                                             const int32_t *wsrc,

diff --git a/aom_dsp/arm/sum_neon.h b/aom_dsp/arm/sum_neon.h
index 7d2f18b..ff68c12 100644
--- a/aom_dsp/arm/sum_neon.h
+++ b/aom_dsp/arm/sum_neon.h

@@ -15,7 +15,7 @@
 #include "aom_ports/mem.h"
 
 static INLINE int horizontal_add_s16x8(const int16x8_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_s16(a);
 #else
   const int32x4_t b = vpaddlq_s16(a);
@@ -27,7 +27,7 @@
 }
 
 static INLINE int horizontal_add_s32x4(const int32x4_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddvq_s32(a);
 #else
   const int64x2_t b = vpaddlq_s32(a);
@@ -38,7 +38,7 @@
 }
 
 static INLINE int64_t horizontal_add_s64x2(const int64x2_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddvq_s64(a);
 #else
   return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1);
@@ -46,7 +46,7 @@
 }
 
 static INLINE uint64_t horizontal_add_u64x2(const uint64x2_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddvq_u64(a);
 #else
   return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1);
@@ -54,7 +54,7 @@
 }
 
 static INLINE uint64_t horizontal_long_add_u32x4(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_u32(a);
 #else
   const uint64x2_t b = vpaddlq_u32(a);
@@ -63,7 +63,7 @@
 }
 
 static INLINE unsigned int horizontal_add_u32x4(const uint32x4_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddvq_u32(a);
 #else
   const uint64x2_t b = vpaddlq_u32(a);
@@ -74,7 +74,7 @@
 }
 
 static INLINE uint32x4_t horizontal_add_4d_u32x4(const uint32x4_t sum[4]) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   uint32x4_t res01 = vpaddq_u32(sum[0], sum[1]);
   uint32x4_t res23 = vpaddq_u32(sum[2], sum[3]);
   return vpaddq_u32(res01, res23);
@@ -90,7 +90,7 @@
 
 static INLINE uint32_t horizontal_long_add_u16x8(const uint16x8_t vec_lo,
                                                  const uint16x8_t vec_hi) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_u16(vec_lo) + vaddlvq_u16(vec_hi);
 #else
   const uint32x4_t vec_l_lo =
@@ -115,7 +115,7 @@
   const uint32x4_t b1 = vpadalq_u16(a1, sum_hi[1]);
   const uint32x4_t b2 = vpadalq_u16(a2, sum_hi[2]);
   const uint32x4_t b3 = vpadalq_u16(a3, sum_hi[3]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const uint32x4_t c0 = vpaddq_u32(b0, b1);
   const uint32x4_t c1 = vpaddq_u32(b2, b3);
   return vpaddq_u32(c0, c1);
@@ -131,7 +131,7 @@
 }
 
 static INLINE uint32_t horizontal_add_u16x8(const uint16x8_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_u16(a);
 #else
   const uint32x4_t b = vpaddlq_u16(a);
@@ -143,7 +143,7 @@
 }
 
 static INLINE uint32x4_t horizontal_add_4d_u16x8(const uint16x8_t sum[4]) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
   const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
   const uint16x8_t b0 = vpaddq_u16(a0, a1);
@@ -160,7 +160,7 @@
 }
 
 static INLINE uint32_t horizontal_add_u32x2(const uint32x2_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddv_u32(a);
 #else
   const uint64x1_t b = vpaddl_u32(a);
@@ -169,7 +169,7 @@
 }
 
 static INLINE uint64_t horizontal_long_add_u32x2(const uint32x2_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlv_u32(a);
 #else
   const uint64x1_t b = vpaddl_u32(a);
@@ -178,7 +178,7 @@
 }
 
 static INLINE uint32_t horizontal_add_u16x4(const uint16x4_t a) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlv_u16(a);
 #else
   const uint32x2_t b = vpaddl_u16(a);

diff --git a/aom_dsp/arm/transpose_neon.h b/aom_dsp/arm/transpose_neon.h
index d151c58..2dbb414 100644
--- a/aom_dsp/arm/transpose_neon.h
+++ b/aom_dsp/arm/transpose_neon.h

@@ -260,7 +260,7 @@
 
 static INLINE uint16x8x2_t aom_vtrnq_u64_to_u16(uint32x4_t a0, uint32x4_t a1) {
   uint16x8x2_t b0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   b0.val[0] = vreinterpretq_u16_u64(
       vtrn1q_u64(vreinterpretq_u64_u32(a0), vreinterpretq_u64_u32(a1)));
   b0.val[1] = vreinterpretq_u16_u64(
@@ -521,7 +521,7 @@
 
 static INLINE int16x8x2_t aom_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) {
   int16x8x2_t b0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   b0.val[0] = vreinterpretq_s16_s64(
       vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1)));
   b0.val[1] = vreinterpretq_s16_s64(
@@ -744,7 +744,7 @@
 
 static INLINE int32x4x2_t aom_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
   int32x4x2_t b0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   b0.val[0] = vreinterpretq_s32_s64(
       vtrn1q_s64(vreinterpretq_s64_s32(a0), vreinterpretq_s64_s32(a1)));
   b0.val[1] = vreinterpretq_s32_s64(

diff --git a/aom_dsp/simd/v128_intrinsics_arm.h b/aom_dsp/simd/v128_intrinsics_arm.h
index fb89d60..eee150a 100644
--- a/aom_dsp/simd/v128_intrinsics_arm.h
+++ b/aom_dsp/simd/v128_intrinsics_arm.h

@@ -97,7 +97,7 @@
   int16x8_t t2 = vmulq_s16(
       vmovl_s8(vreinterpret_s8_s64(vget_high_s64(a))),
       vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(vget_high_s64(b)))));
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_s16(t1) + vaddlvq_s16(t2);
 #else
   int64x2_t t = vpaddlq_s32(vaddq_s32(vpaddlq_s16(t1), vpaddlq_s16(t2)));
@@ -117,7 +117,7 @@
 }
 
 SIMD_INLINE uint64_t v128_hadd_u8(v128 x) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_u8(vreinterpretq_u8_s64(x));
 #else
   uint64x2_t t = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vreinterpretq_u8_s64(x))));
@@ -155,7 +155,7 @@
 }
 
 SIMD_INLINE uint32_t v128_sad_u8_sum(sad128_internal s) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_u16(s.hi) + vaddlvq_u16(s.lo);
 #else
   uint64x2_t t = vpaddlq_u32(vpaddlq_u16(vaddq_u16(s.hi, s.lo)));
@@ -286,7 +286,7 @@
 }
 
 SIMD_INLINE v128 v128_mulhi_s16(v128 a, v128 b) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_s16(vuzp2q_s16(
       vreinterpretq_s16_s32(vmull_s16(vreinterpret_s16_s64(vget_low_s64(a)),
                                       vreinterpret_s16_s64(vget_low_s64(b)))),
@@ -304,7 +304,7 @@
 }
 
 SIMD_INLINE v128 v128_madd_s16(v128 a, v128 b) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   int32x4_t t1 = vmull_s16(vreinterpret_s16_s64(vget_low_s64(a)),
                            vreinterpret_s16_s64(vget_low_s64(b)));
   int32x4_t t2 =
@@ -317,7 +317,7 @@
 }
 
 SIMD_INLINE v128 v128_madd_us8(v128 a, v128 b) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   int16x8_t t1 = vmulq_s16(
       vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(vget_low_s64(a)))),
       vmovl_s8(vreinterpret_s8_s64(vget_low_s64(b))));
@@ -369,7 +369,7 @@
 
 SIMD_INLINE uint32_t v128_movemask_8(v128 a) {
   a = vreinterpretq_s64_u8(vcltq_s8(vreinterpretq_s8_s64(a), vdupq_n_s8(0)));
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   uint8x16_t m =
       vandq_u8(vreinterpretq_u8_s64(a),
                vreinterpretq_u8_u64(vdupq_n_u64(0x8040201008040201ULL)));
@@ -414,7 +414,7 @@
 }
 
 SIMD_INLINE v128 v128_ziplo_8(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u8(
       vzip1q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
 #else
@@ -424,7 +424,7 @@
 }
 
 SIMD_INLINE v128 v128_ziphi_8(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u8(
       vzip2q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
 #else
@@ -439,7 +439,7 @@
 }
 
 SIMD_INLINE v128 v128_ziplo_16(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u16(
       vzip1q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
 #else
@@ -449,7 +449,7 @@
 }
 
 SIMD_INLINE v128 v128_ziphi_16(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u16(
       vzip2q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
 #else
@@ -464,7 +464,7 @@
 }
 
 SIMD_INLINE v128 v128_ziplo_32(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u32(
       vzip1q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
 #else
@@ -474,7 +474,7 @@
 }
 
 SIMD_INLINE v128 v128_ziphi_32(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u32(
       vzip2q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
 #else
@@ -497,7 +497,7 @@
 }
 
 SIMD_INLINE v128 v128_unziplo_8(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u8(
       vuzp1q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
 #else
@@ -507,7 +507,7 @@
 }
 
 SIMD_INLINE v128 v128_unziphi_8(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u8(
       vuzp2q_u8(vreinterpretq_u8_s64(y), vreinterpretq_u8_s64(x)));
 #else
@@ -517,7 +517,7 @@
 }
 
 SIMD_INLINE v128 v128_unziplo_16(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u16(
       vuzp1q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
 #else
@@ -528,7 +528,7 @@
 }
 
 SIMD_INLINE v128 v128_unziphi_16(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u16(
       vuzp2q_u16(vreinterpretq_u16_s64(y), vreinterpretq_u16_s64(x)));
 #else
@@ -539,7 +539,7 @@
 }
 
 SIMD_INLINE v128 v128_unziplo_32(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u32(
       vuzp1q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
 #else
@@ -550,7 +550,7 @@
 }
 
 SIMD_INLINE v128 v128_unziphi_32(v128 x, v128 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u32(
       vuzp2q_u32(vreinterpretq_u32_s64(y), vreinterpretq_u32_s64(x)));
 #else
@@ -637,7 +637,7 @@
 }
 
 SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpretq_s64_u8(
       vqtbl1q_u8(vreinterpretq_u8_s64(x), vreinterpretq_u8_s64(pattern)));
 #else

diff --git a/aom_dsp/simd/v256_intrinsics_v128.h b/aom_dsp/simd/v256_intrinsics_v128.h
index cf44965..3dfb325 100644
--- a/aom_dsp/simd/v256_intrinsics_v128.h
+++ b/aom_dsp/simd/v256_intrinsics_v128.h

@@ -614,7 +614,7 @@
 
 SIMD_INLINE v256 v256_shuffle_8(v256 x, v256 pattern) {
 #if HAVE_NEON
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   uint8x16x2_t p = { { vreinterpretq_u8_s64(x.val[0]),
                        vreinterpretq_u8_s64(x.val[1]) } };
   return v256_from_v128(
@@ -653,7 +653,7 @@
 
 SIMD_INLINE v256 v256_wideshuffle_8(v256 x, v256 y, v256 pattern) {
 #if HAVE_NEON
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   uint8x16x4_t p = { {
       vreinterpretq_u8_s64(y.val[0]),
       vreinterpretq_u8_s64(y.val[1]),

diff --git a/aom_dsp/simd/v64_intrinsics_arm.h b/aom_dsp/simd/v64_intrinsics_arm.h
index 265ebed..35d88e2 100644
--- a/aom_dsp/simd/v64_intrinsics_arm.h
+++ b/aom_dsp/simd/v64_intrinsics_arm.h

@@ -130,7 +130,7 @@
   int16x8_t t =
       vmulq_s16(vmovl_s8(vreinterpret_s8_s64(x)),
                 vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_s64(y))));
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_s16(t);
 #else
   int64x2_t r = vpaddlq_s32(vpaddlq_s16(t));
@@ -139,7 +139,7 @@
 }
 
 SIMD_INLINE int64_t v64_dotp_s16(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_s32(
       vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
 #else
@@ -150,7 +150,7 @@
 }
 
 SIMD_INLINE uint64_t v64_hadd_u8(v64 x) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlv_u8(vreinterpret_u8_s64(x));
 #else
   return vget_lane_u64(
@@ -173,7 +173,7 @@
 }
 
 SIMD_INLINE uint32_t v64_sad_u8_sum(sad64_internal s) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_u16(s);
 #else
   uint64x2_t r = vpaddlq_u32(vpaddlq_u16(s));
@@ -194,7 +194,7 @@
 }
 
 SIMD_INLINE uint32_t v64_ssd_u8_sum(ssd64_internal s) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddvq_u32(s);
 #else
   uint64x2_t t = vpaddlq_u32(s);
@@ -290,7 +290,7 @@
 }
 
 SIMD_INLINE v64 v64_mulhi_s16(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   int16x8_t t = vreinterpretq_s16_s32(
       vmull_s16(vreinterpret_s16_s64(x), vreinterpret_s16_s64(y)));
   return vget_low_s64(vreinterpretq_s64_s16(vuzp2q_s16(t, t)));
@@ -370,7 +370,7 @@
 }
 
 SIMD_INLINE v64 v64_ziplo_8(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u8(
       vzip1_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
 #else
@@ -380,7 +380,7 @@
 }
 
 SIMD_INLINE v64 v64_ziphi_8(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u8(
       vzip2_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
 #else
@@ -390,7 +390,7 @@
 }
 
 SIMD_INLINE v64 v64_ziplo_16(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u16(
       vzip1_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
 #else
@@ -400,7 +400,7 @@
 }
 
 SIMD_INLINE v64 v64_ziphi_16(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u16(
       vzip2_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
 #else
@@ -410,7 +410,7 @@
 }
 
 SIMD_INLINE v64 v64_ziplo_32(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u32(
       vzip1_u32(vreinterpret_u32_s64(y), vreinterpret_u32_s64(x)));
 #else
@@ -420,7 +420,7 @@
 }
 
 SIMD_INLINE v64 v64_ziphi_32(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u32(
       vzip2_u32(vreinterpret_u32_s64(y), vreinterpret_u32_s64(x)));
 #else
@@ -466,7 +466,7 @@
 }
 
 SIMD_INLINE v64 v64_unziplo_8(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u8(
       vuzp1_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
 #else
@@ -476,7 +476,7 @@
 }
 
 SIMD_INLINE v64 v64_unziphi_8(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u8(
       vuzp2_u8(vreinterpret_u8_s64(y), vreinterpret_u8_s64(x)));
 #else
@@ -486,7 +486,7 @@
 }
 
 SIMD_INLINE v64 v64_unziplo_16(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u16(
       vuzp1_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
 #else
@@ -496,7 +496,7 @@
 }
 
 SIMD_INLINE v64 v64_unziphi_16(v64 x, v64 y) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vreinterpret_s64_u16(
       vuzp2_u16(vreinterpret_u16_s64(y), vreinterpret_u16_s64(x)));
 #else

diff --git a/aom_ports/arm_cpudetect.c b/aom_ports/arm_cpudetect.c
index e1981c4..276ef61 100644
--- a/aom_ports/arm_cpudetect.c
+++ b/aom_ports/arm_cpudetect.c

@@ -57,16 +57,14 @@
 }
 
 #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT || __APPLE__ */
-#if HAVE_NEON && \
-    !(defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC))
+#if HAVE_NEON && !AOM_ARCH_AARCH64
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 #undef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
 #undef WIN32_EXTRA_LEAN
 #define WIN32_EXTRA_LEAN
 #include <windows.h>
-#endif  // HAVE_NEON &&
-        // !(defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC))
+#endif  // HAVE_NEON && !AOM_ARCH_AARCH64
 
 int aom_arm_cpu_caps(void) {
   int flags;
@@ -75,7 +73,7 @@
     return flags;
   }
   mask = arm_cpu_env_mask();
-#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+#if AOM_ARCH_AARCH64
   return HAS_NEON & mask;
 #else
 /* MSVC has no inline __asm support for ARM, but it does let you __emit
@@ -94,7 +92,7 @@
   }
 #endif  /* HAVE_NEON */
   return flags & mask;
-#endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+#endif  // AOM_ARCH_AARCH64
 }
 
 #elif defined(__ANDROID__) /* end _MSC_VER */

diff --git a/av1/common/arm/cfl_neon.c b/av1/common/arm/cfl_neon.c
index 8c15345..59c1b92 100644
--- a/av1/common/arm/cfl_neon.c
+++ b/av1/common/arm/cfl_neon.c

@@ -132,7 +132,7 @@
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
-#ifndef __aarch64__
+#if !AOM_ARCH_AARCH64
 uint16x8_t vpaddq_u16(uint16x8_t a, uint16x8_t b) {
   return vcombine_u16(vpadd_u16(vget_low_u16(a), vget_high_u16(a)),
                       vpadd_u16(vget_low_u16(b), vget_high_u16(b)));
@@ -313,7 +313,7 @@
 
   // Permute and add in such a way that each lane contains the block sum.
   // [A+C+B+D, B+D+A+C, C+A+D+B, D+B+C+A]
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4);
   sum_32x4 = vpaddq_u32(sum_32x4, sum_32x4);
 #else

diff --git a/av1/common/arm/convolve_neon.c b/av1/common/arm/convolve_neon.c
index ec9008e..ed47b17 100644
--- a/av1/common/arm/convolve_neon.c
+++ b/av1/common/arm/convolve_neon.c

@@ -44,7 +44,7 @@
   return sum;
 }
 
-#if !defined(__aarch64__)
+#if !AOM_ARCH_AARCH64
 static INLINE uint8x8_t convolve8_x_4x1(const int16x4_t s0, const int16x4_t s1,
                                         const int16x4_t s2, const int16x4_t s3,
                                         const int16x4_t s4, const int16x4_t s5,
@@ -67,9 +67,9 @@
   // We halved the convolution filter values so - 1 from the right shift.
   return vqrshrun_n_s16(vcombine_s16(sum, vdup_n_s16(0)), FILTER_BITS - 1);
 }
-#endif  // !defined(__arch64__)
+#endif  // !AOM_ARCH_AARCH64
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE int32x4_t convolve12_4_usdot(uint8x16_t samples,
                                            const int8x16_t filters,
@@ -126,7 +126,7 @@
                       vqrshrn_n_s32(sum[1], FILTER_BITS));
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE int16x4_t convolve12_horiz_4_sdot(
     uint8x16_t samples, const int8x16_t filters, const int32x4_t correction,
@@ -257,7 +257,7 @@
                       vqrshrn_n_s32(sum[1], FILTER_BITS));
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE uint8x8_t convolve8_vert_8x4(
     const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
@@ -334,7 +334,7 @@
   return vqmovun_s16(res);
 }
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 void convolve_x_sr_12tap_neon(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, int w, int h,
@@ -572,7 +572,7 @@
   }
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 void convolve_x_sr_12tap_neon(const uint8_t *src, int src_stride, uint8_t *dst,
                               int dst_stride, int w, int h,
@@ -830,7 +830,7 @@
   }
 }
 
-#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 static INLINE uint8x8_t
 convolve8_horiz_8x8(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
@@ -951,7 +951,7 @@
   const int16x8_t x_filter_0_7 = vld1q_s16(x_filter_ptr);
   const int16x4_t x_filter_8_11 = vld1_s16(x_filter_ptr + 8);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   // This shim of 1 << (ROUND0_BITS - 1) enables us to use a single
   // rounding right shift by FILTER_BITS - instead of a first rounding right
   // shift by ROUND0_BITS, followed by second rounding right shift by
@@ -1058,10 +1058,10 @@
     x_filter_12tap_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w, h,
                                  x_filter_0_7, x_filter_8_11);
   }
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
   x_filter_12tap_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w, h,
                                x_filter_0_7, x_filter_8_11);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 }
 
 void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
@@ -1083,7 +1083,7 @@
   }
 
   uint8x8_t t0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   uint8x8_t t1, t2, t3;
   // This shim of 1 << ((ROUND0_BITS - 1) - 1) enables us to use a single
   // rounding right shift by FILTER_BITS - instead of a first rounding right
@@ -1091,11 +1091,11 @@
   // FILTER_BITS - ROUND0_BITS.
   // The outermost -1 is needed because we halved the filter values.
   const int16x8_t horiz_const = vdupq_n_s16(1 << ((ROUND0_BITS - 1) - 1));
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
   // Filter values are even so downshift by 1 to reduce precision requirements.
   const int16x8_t x_filter = vshrq_n_s16(vld1q_s16(x_filter_ptr), 1);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   if (h == 4) {
     uint8x8_t d01, d23;
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
@@ -1172,18 +1172,18 @@
       w -= 4;
     } while (w > 0);
   } else {
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     int width;
     const uint8_t *s;
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s8, s9, s10;
     uint8x8_t t4, t5, t6, t7;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     if (w <= 4) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       do {
         load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
         transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
@@ -1260,7 +1260,7 @@
         dst += 8 * dst_stride;
         h -= 8;
       } while (h > 0);
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
     // This shim of 1 << ((ROUND0_BITS - 1) - 1) enables us to use a single
     // rounding right shift by FILTER_BITS - instead of a first rounding right
     // shift by ROUND0_BITS, followed by second rounding right shift by
@@ -1301,11 +1301,11 @@
       }
       h -= 1;
     } while (h > 0);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } else {
       uint8_t *d;
       int16x8_t s11;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       int16x8_t s12, s13, s14;
       do {
         __builtin_prefetch(src + 0 * src_stride);
@@ -1390,7 +1390,7 @@
         dst += 8 * dst_stride;
         h -= 8;
       } while (h > 0);
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
     // This shim of 1 << ((ROUND0_BITS - 1) - 1) enables us to use a single
     // rounding right shift by FILTER_BITS - instead of a first rounding right
     // shift by ROUND0_BITS, followed by second rounding right shift by
@@ -1434,14 +1434,14 @@
       dst += dst_stride;
       h -= 1;
     } while (h > 0);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     }
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   }
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE void convolve_y_sr_6tap_neon(const uint8_t *src_ptr,
                                            int src_stride, uint8_t *dst_ptr,
@@ -1452,11 +1452,11 @@
     int16x4_t s0, s1, s2, s3, s4, s5, d0;
     uint8x8_t d01;
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     uint8x8_t t6, t7, t8;
     int16x4_t s6, s7, s8, d1, d2, d3;
     uint8x8_t d23;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     const uint8_t *s = src_ptr + src_stride;
     uint8_t *d = dst_ptr;
@@ -1470,7 +1470,7 @@
     s += 5 * src_stride;
 
     do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       load_u8_8x4(s, src_stride, &t5, &t6, &t7, &t8);
       s5 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t5)));
       s6 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t6)));
@@ -1509,7 +1509,7 @@
       s += 4 * src_stride;
       d += 4 * dst_stride;
       h -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       t5 = vld1_u8(s);
       s5 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t5)));
 
@@ -1530,18 +1530,18 @@
       s += src_stride;
       d += dst_stride;
       h--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (h > 0);
   } else {
     // if width is a multiple of 8 & height is a multiple of 4
     uint8x8_t t0, t1, t2, t3, t4, t5;
     int16x8_t s0, s1, s2, s3, s4, s5, dd0;
     uint8x8_t d0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     uint8x8_t t6, t7, t8;
     int16x8_t s6, s7, s8, dd1, dd2, dd3;
     uint8x8_t d1, d2, d3;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       int height = h;
@@ -1557,7 +1557,7 @@
       s += 5 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_u8_8x4(s, src_stride, &t5, &t6, &t7, &t8);
         s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
         s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
@@ -1588,7 +1588,7 @@
         s += 4 * src_stride;
         d += 4 * dst_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         t5 = vld1_u8(s);
         s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
 
@@ -1605,7 +1605,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height > 0);
 
       src_ptr += 8;
@@ -1891,10 +1891,10 @@
   if (w <= 4) {
     uint8x8_t d01;
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     uint8x8_t d23;
     int16x4_t s8, s9, s10, d1, d2, d3;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     s0 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
     src += src_stride;
     s1 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
@@ -1913,7 +1913,7 @@
     do {
       s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
       src += src_stride;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       s8 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
       src += src_stride;
       s9 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src))));
@@ -1962,7 +1962,7 @@
       s6 = s10;
       dst += 4 * dst_stride;
       h -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       __builtin_prefetch(dst + 0 * dst_stride);
       __builtin_prefetch(src + 0 * src_stride);
 
@@ -1984,7 +1984,7 @@
       s6 = s7;
       dst += dst_stride;
       h -= 1;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (h > 0);
   } else {
     int height;
@@ -1992,10 +1992,10 @@
     uint8_t *d;
     uint8x8_t t0;
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     uint8x8_t t1, t2, t3;
     int16x8_t s8, s9, s10;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     do {
       __builtin_prefetch(src + 0 * src_stride);
       __builtin_prefetch(src + 1 * src_stride);
@@ -2025,7 +2025,7 @@
       do {
         s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
         s += src_stride;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         s8 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
         s += src_stride;
         s9 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
@@ -2060,7 +2060,7 @@
         s6 = s10;
         d += 4 * dst_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         __builtin_prefetch(d);
         __builtin_prefetch(s);
 
@@ -2077,7 +2077,7 @@
         s5 = s6;
         s6 = s7;
         height -= 1;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height > 0);
       src += 8;
       dst += 8;
@@ -2086,7 +2086,7 @@
   }
 }
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE int16x4_t convolve12_horiz_4_usdot(uint8x16_t samples,
                                                  const int8x16_t filters,
@@ -2312,7 +2312,7 @@
   }
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE void convolve_2d_sr_horiz_12tap_neon(
     const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
@@ -2495,7 +2495,7 @@
   }
 }
 
-#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 static INLINE int16x4_t convolve12_horiz_4x4_s16(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
@@ -2591,7 +2591,7 @@
   const int32x4_t horiz_const =
       vdupq_n_s32((1 << (bd + FILTER_BITS - 1)) + (1 << (ROUND0_BITS - 1)));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   do {
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
     uint8x8_t t0, t1, t2, t3;
@@ -2683,15 +2683,15 @@
                                      w, h, x_filter_0_7, x_filter_8_11,
                                      horiz_const);
   }
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
   horiz_filter_12tap_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
                                    h, x_filter_0_7, x_filter_8_11, horiz_const);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE void convolve_2d_sr_horiz_8tap_neon(
     const uint8_t *src, int src_stride, int16_t *im_block, int im_stride, int w,
@@ -2827,7 +2827,7 @@
   }
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE void convolve_2d_sr_horiz_8tap_neon(
     const uint8_t *src, int src_stride, int16_t *im_block, int im_stride, int w,
@@ -2975,7 +2975,7 @@
   }
 }
 
-#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 // Horizontal filtering for convolve_2d_sr for width multiple of 8
 // Processes one row at a time
@@ -3094,7 +3094,7 @@
     const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2)) +
                                              (1 << ((ROUND0_BITS - 1) - 1)));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     do {
       int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
       uint8x8_t t0, t1, t2, t3;
@@ -3154,10 +3154,10 @@
                                  height, x_filter, horiz_const);
     }
 
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
     horiz_filter_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
                                height, x_filter, horiz_const);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   } else {
     // This shim of  1 << ((ROUND0_BITS - 1) - 1) enables us to use non-rounding
@@ -3166,7 +3166,7 @@
     const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2)) +
                                               (1 << ((ROUND0_BITS - 1) - 1)));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
 
     for (; height >= 8; height -= 8) {
       int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14,
@@ -3325,14 +3325,14 @@
                                  height, x_filter, horiz_const);
     }
 
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
     horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
                                height, x_filter, horiz_const);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
   }
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE int32x4_t convolve12_vert_4_s32(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
@@ -3545,10 +3545,10 @@
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
     uint8x8_t d01;
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s8, s9, s10, d1, d2, d3;
     uint8x8_t d23;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     int16_t *s = src_ptr;
     uint8_t *d = dst_ptr;
@@ -3557,7 +3557,7 @@
     s += 7 * src_stride;
 
     do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       load_s16_4x4(s, src_stride, &s7, &s8, &s9, &s10);
 
       d0 = convolve8_vert_4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
@@ -3594,7 +3594,7 @@
       s += 4 * src_stride;
       d += 4 * dst_stride;
       h -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       s7 = vld1_s16(s);
       s += src_stride;
 
@@ -3617,16 +3617,16 @@
       s6 = s7;
       d += dst_stride;
       h--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (h > 0);
   } else {
     // if width is a multiple of 8 & height is a multiple of 4
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
     uint8x8_t d0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s8, s9, s10;
     uint8x8_t d1, d2, d3;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       int height = h;
@@ -3637,7 +3637,7 @@
       s += 7 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_s16_8x4(s, src_stride, &s7, &s8, &s9, &s10);
 
         d0 = convolve8_vert_8_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
@@ -3665,7 +3665,7 @@
         s += 4 * src_stride;
         d += 4 * dst_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         s7 = vld1q_s16(s);
 
         d0 = convolve8_vert_8_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
@@ -3683,7 +3683,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height > 0);
 
       src_ptr += 8;
@@ -3753,10 +3753,10 @@
     int16x4_t s0, s1, s2, s3, s4, s5, d0;
     uint8x8_t d01;
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s6, s7, s8, d1, d2, d3;
     uint8x8_t d23;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     int16_t *s = src_ptr;
     uint8_t *d = dst_ptr;
@@ -3765,7 +3765,7 @@
     s += 5 * src_stride;
 
     do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       load_s16_4x4(s, src_stride, &s5, &s6, &s7, &s8);
 
       d0 = convolve6_vert_4_s32(s0, s1, s2, s3, s4, s5, y_filter);
@@ -3800,7 +3800,7 @@
       s += 4 * src_stride;
       d += 4 * dst_stride;
       h -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       s5 = vld1_s16(s);
 
       d0 = convolve6_vert_4_s32(s0, s1, s2, s3, s4, s5, y_filter);
@@ -3820,16 +3820,16 @@
       s += src_stride;
       d += dst_stride;
       h--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (h > 0);
   } else {
     // if width is a multiple of 8 & height is a multiple of 4
     int16x8_t s0, s1, s2, s3, s4, s5;
     uint8x8_t d0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s6, s7, s8;
     uint8x8_t d1, d2, d3;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       int height = h;
@@ -3840,7 +3840,7 @@
       s += 5 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_s16_8x4(s, src_stride, &s5, &s6, &s7, &s8);
 
         d0 = convolve6_vert_8_s32(s0, s1, s2, s3, s4, s5, y_filter, sub_const);
@@ -3862,7 +3862,7 @@
         s += 4 * src_stride;
         d += 4 * dst_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         s5 = vld1q_s16(s);
 
         d0 = convolve6_vert_8_s32(s0, s1, s2, s3, s4, s5, y_filter, sub_const);
@@ -3877,7 +3877,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height > 0);
 
       src_ptr += 8;

diff --git a/av1/common/arm/convolve_neon.h b/av1/common/arm/convolve_neon.h
index cbca376..528712c 100644
--- a/av1/common/arm/convolve_neon.h
+++ b/av1/common/arm/convolve_neon.h

@@ -232,7 +232,7 @@
 
 // clang versions < 16 did not include the dotprod feature for Arm architecture
 // versions that should have it by default, e.g., armv8.6-a.
-#if defined(__aarch64__) && \
+#if AOM_ARCH_AARCH64 && \
     (defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8))
 
 DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = {
@@ -241,9 +241,9 @@
   8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14
 };
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE int16x8_t convolve8_x_8_usdot(uint8x16_t samples,
                                             const int8x8_t filters,
@@ -319,7 +319,7 @@
   return sum;
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE int16x8_t convolve8_horiz_8_sdot(uint8x16_t samples,
                                                const int8x8_t filters,
@@ -444,7 +444,7 @@
   return vcombine_s16(vmovn_s32(sum[0]), vmovn_s32(sum[1]));
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE int16x4_t convolve8_4x4_s16(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
@@ -508,7 +508,7 @@
   return sum;
 }
 
-#if !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#if !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 static INLINE int16x4_t convolve8_horiz_4x4_s16(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
@@ -556,6 +556,6 @@
   return vshrq_n_s16(sum, ROUND0_BITS - 1);
 }
 
-#endif  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#endif  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 #endif  // AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_

diff --git a/av1/common/arm/highbd_convolve_neon.c b/av1/common/arm/highbd_convolve_neon.c
index d7def2b..979aff4 100644
--- a/av1/common/arm/highbd_convolve_neon.c
+++ b/av1/common/arm/highbd_convolve_neon.c

@@ -1156,7 +1156,7 @@
       // 2
       const uint32x4_t src_idx_u32 =
           vshlq_n_u32(vshrq_n_u32(xqn_idx, SCALE_SUBPEL_BITS), 1);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       uint64x2_t src4[2];
       src4[0] = vaddw_u32(vdupq_n_u64((const uint64_t)src_ptr),
                           vget_low_u32(src_idx_u32));
@@ -1172,7 +1172,7 @@
       int16_t *src4_ptr[4];
       uint32_t *tmp_ptr = (uint32_t *)&src4_ptr;
       vst1q_u32(tmp_ptr, src4);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       // Same for the filter vectors
       const int32x4_t filter_idx_s32 = vreinterpretq_s32_u32(
           vshrq_n_u32(vandq_u32(xqn_idx, subpel_mask), SCALE_EXTRA_BITS));
@@ -1253,7 +1253,7 @@
         // = 2
         const uint32x4_t src_idx_u32 =
             vshlq_n_u32(vshrq_n_u32(xqn_idx, SCALE_SUBPEL_BITS), 1);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         uint64x2_t src4[2];
         src4[0] = vaddw_u32(vdupq_n_u64((const uint64_t)s),
                             vget_low_u32(src_idx_u32));
@@ -1269,7 +1269,7 @@
         int16_t *src4_ptr[4];
         uint32_t *tmp_ptr = (uint32_t *)&src4_ptr;
         vst1q_u32(tmp_ptr, src4);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
         // Same for the filter vectors
         const int32x4_t filter_idx_s32 = vreinterpretq_s32_u32(
             vshrq_n_u32(vandq_u32(xqn_idx, subpel_mask), SCALE_EXTRA_BITS));
@@ -2178,7 +2178,7 @@
       // negative offsets. Argon test
       // profile0_core/streams/test10573_11003.obu was failing because of
       // this.
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       uint64x2_t tmp4[2];
       tmp4[0] = vreinterpretq_u64_s64(vaddw_s32(
           vdupq_n_s64((const int64_t)src_ptr), vget_low_s32(src_idx)));
@@ -2216,7 +2216,7 @@
       const int16_t *x_filter4_ptr[4];
       tmp_ptr = (uint32_t *)&x_filter4_ptr;
       vst1q_u32(tmp_ptr, tmp4);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       // Load source
       s0 = vld1q_s16(src4_ptr[0]);
       s1 = vld1q_s16(src4_ptr[1]);
@@ -2296,7 +2296,7 @@
         // negative offsets. Argon test
         // profile0_core/streams/test10573_11003.obu was failing because of
         // this.
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         uint64x2_t tmp4[2];
         tmp4[0] = vreinterpretq_u64_s64(
             vaddw_s32(vdupq_n_s64((const int64_t)s), vget_low_s32(src_idx)));
@@ -2334,7 +2334,7 @@
         const int16_t *x_filter4_ptr[4];
         tmp_ptr = (uint32_t *)&x_filter4_ptr;
         vst1q_u32(tmp_ptr, tmp4);
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
         // Load source
         s0 = vld1q_s16(src4_ptr[0]);

diff --git a/av1/common/arm/highbd_inv_txfm_neon.c b/av1/common/arm/highbd_inv_txfm_neon.c
index 96b9738..d197fca 100644
--- a/av1/common/arm/highbd_inv_txfm_neon.c
+++ b/av1/common/arm/highbd_inv_txfm_neon.c

@@ -17,7 +17,7 @@
 #include "config/aom_config.h"
 #include "config/av1_rtcd.h"
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
 #define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3)         \
   do {                                                        \
     int32x4x2_t swap_low = vtrnq_s32(x0, x1);                 \
@@ -49,7 +49,7 @@
     y3 = vextq_s32(swap_low.val[1],                                      \
                    vextq_s32(swap_high.val[1], swap_high.val[1], 2), 2); \
   } while (0)
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
 static INLINE void transpose_4x4(const int32x4_t *in, int32x4_t *out) {
   TRANSPOSE_4X4(in[0], in[1], in[2], in[3], out[0], out[1], out[2], out[3]);
@@ -644,12 +644,12 @@
       vreinterpretq_s16_s32(u0x.val[1]), vreinterpretq_s16_s32(zero), 1));
 
   u0x = vzipq_s32(u0x.val[0], u0x.val[1]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   u0 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u0x.val[0]),
                                         vreinterpretq_s64_s32(u0x.val[1])));
 #else
   u0 = vcombine_s32(vget_low_s32(u0x.val[0]), vget_low_s32(u0x.val[1]));
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
   // u1
   int32x4x2_t u1x;
   u1x.val[0] = vreinterpretq_s32_s64(
@@ -669,12 +669,12 @@
       vreinterpretq_s16_s32(u1x.val[1]), vreinterpretq_s16_s32(zero), 1));
 
   u1x = vzipq_s32(u1x.val[0], u1x.val[1]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   u1 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u1x.val[0]),
                                         vreinterpretq_s64_s32(u1x.val[1])));
 #else
   u1 = vcombine_s32(vget_low_s32(u1x.val[0]), vget_low_s32(u1x.val[1]));
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // u2
   int32x4x2_t u2x;
@@ -695,12 +695,12 @@
       vreinterpretq_s16_s32(u2x.val[1]), vreinterpretq_s16_s32(zero), 1));
 
   u2x = vzipq_s32(u2x.val[0], u2x.val[1]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   u2 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u2x.val[0]),
                                         vreinterpretq_s64_s32(u2x.val[1])));
 #else
   u2 = vcombine_s32(vget_low_s32(u2x.val[0]), vget_low_s32(u2x.val[1]));
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   // u3
   int32x4x2_t u3x;
@@ -721,12 +721,12 @@
       vreinterpretq_s16_s32(u3x.val[1]), vreinterpretq_s16_s32(zero), 1));
 
   u3x = vzipq_s32(u3x.val[0], u3x.val[1]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   u3 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u3x.val[0]),
                                         vreinterpretq_s64_s32(u3x.val[1])));
 #else
   u3 = vcombine_s32(vget_low_s32(u3x.val[0]), vget_low_s32(u3x.val[1]));
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   out[0] = u0;
   out[1] = u1;
@@ -809,7 +809,7 @@
         vshrq_n_s64(vreinterpretq_s64_s32(a0.val[1]), NewSqrt2Bits));
 
     a0 = vzipq_s32(a0.val[0], a0.val[1]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     out[i] = vreinterpretq_s32_s64(vzip1q_s64(
         vreinterpretq_s64_s32(a0.val[0]), vreinterpretq_s64_s32(a0.val[1])));
 #else
@@ -2824,7 +2824,7 @@
     a0.val[1] = vreinterpretq_s32_s64(
         vshrq_n_s64(vreinterpretq_s64_s32(a0.val[1]), NewSqrt2Bits));
     a0 = vzipq_s32(a0.val[0], a0.val[1]);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     out[i] = vreinterpretq_s32_s64(vzip1q_s64(
         vreinterpretq_s64_s32(a0.val[0]), vreinterpretq_s64_s32(a0.val[1])));
 #else

diff --git a/av1/common/arm/jnt_convolve_neon.c b/av1/common/arm/jnt_convolve_neon.c
index 3cb2567..018b2ce 100644
--- a/av1/common/arm/jnt_convolve_neon.c
+++ b/av1/common/arm/jnt_convolve_neon.c

@@ -22,7 +22,7 @@
 #include "av1/common/common.h"
 #include "av1/common/arm/convolve_neon.h"
 
-#if !defined(__aarch64__)
+#if !AOM_ARCH_AARCH64
 static INLINE void compute_avg_4x1(uint16x4_t dd0, uint16x4_t d0,
                                    const uint16_t fwd_offset,
                                    const uint16_t bck_offset,
@@ -74,7 +74,7 @@
 
   *d0_u8 = vqrshrun_n_s16(dst0, FILTER_BITS - ROUND0_BITS);
 }
-#endif  // !defined(__arch64__)
+#endif  // !AOM_ARCH_AARCH64
 
 static INLINE void compute_avg_4x4(uint16x4_t dd0, uint16x4_t dd1,
                                    uint16x4_t dd2, uint16x4_t dd3,
@@ -177,7 +177,7 @@
   *d3_u8 = vqrshrun_n_s16(dst3, FILTER_BITS - ROUND0_BITS);
 }
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE int16x4_t convolve8_4_2d_h(uint8x16_t samples,
                                          const int8x8_t x_filter,
@@ -295,7 +295,7 @@
   }
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE int16x4_t convolve8_4_2d_h(uint8x16_t samples,
                                          const int8x8_t x_filter,
@@ -431,7 +431,7 @@
   }
 }
 
-#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 static INLINE int16x4_t convolve8_4_2d_h(const int16x4_t s0, const int16x4_t s1,
                                          const int16x4_t s2, const int16x4_t s3,
@@ -492,10 +492,10 @@
   if (w == 4) {
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
     uint8x8_t t0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s8, s9, s10, d1, d2, d3;
     uint8x8_t t1, t2, t3;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     // A shim of 1 << ((ROUND0_BITS - 1) - 1) enables us to use non-rounding
     // shifts - which are generally faster than rounding shifts on modern CPUs.
@@ -504,7 +504,7 @@
                                              (1 << ((ROUND0_BITS - 1) - 1)));
     do {
       __builtin_prefetch(src_ptr + 0 * src_stride);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       __builtin_prefetch(src_ptr + 1 * src_stride);
       __builtin_prefetch(src_ptr + 2 * src_stride);
       __builtin_prefetch(src_ptr + 3 * src_stride);
@@ -548,7 +548,7 @@
       src_ptr += 4 * src_stride;
       dst_ptr += 4 * dst_stride;
       height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       t0 = vld1_u8(src_ptr);  // a0 a1 a2 a3 a4 a5 a6 a7
       s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));   // a0 a1 a2 a3
       s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));  // a4 a5 a6 a7
@@ -572,16 +572,16 @@
       src_ptr += src_stride;
       dst_ptr += dst_stride;
       height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (height > 0);
   } else {
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, d0;
     uint8x8_t t0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s9, s10, s11, s12, s13, s14;
     int16x8_t d1, d2, d3, d4, d5, d6, d7;
     uint8x8_t t1, t2, t3, t4, t5, t6, t7;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     // A shim of 1 << ((ROUND0_BITS - 1) - 1) enables us to use non-rounding
     // shifts - which are generally faster than rounding shifts on modern CPUs.
@@ -593,7 +593,7 @@
       int16_t *d = dst_ptr;
       int width = w;
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       __builtin_prefetch(src_ptr + 0 * src_stride);
       __builtin_prefetch(src_ptr + 1 * src_stride);
       __builtin_prefetch(src_ptr + 2 * src_stride);
@@ -672,7 +672,7 @@
       src_ptr += 8 * src_stride;
       dst_ptr += 8 * dst_stride;
       height -= 8;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       t0 = vld1_u8(src_ptr);
       s0 = vreinterpretq_s16_u16(vmovl_u8(t0));  // a0 a1 a2 a3 a4 a5 a6 a7
 
@@ -703,12 +703,12 @@
       src_ptr += src_stride;
       dst_ptr += dst_stride;
       height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (height > 0);
   }
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE uint16x4_t
 convolve6_4_2d_v(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
@@ -779,17 +779,17 @@
     int16x4_t s0, s1, s2, s3, s4, s5;
     uint16x4_t dd0, d0;
     uint8x8_t d01_u8;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s6, s7, s8;
     uint16x4_t dd1, dd2, dd3, d1, d2, d3;
     uint8x8_t d23_u8;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     load_s16_4x5(src_ptr, src_stride, &s0, &s1, &s2, &s3, &s4);
     src_ptr += 5 * src_stride;
 
     do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       load_s16_4x4(src_ptr, src_stride, &s5, &s6, &s7, &s8);
 
       d0 = convolve6_4_2d_v(s0, s1, s2, s3, s4, s5, y_filter, offset_const);
@@ -821,7 +821,7 @@
       src_ptr += 4 * src_stride;
       dst_ptr += 4 * dst_stride;
       h -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       s5 = vld1_s16(src_ptr);
 
       d0 = convolve6_4_2d_v(s0, s1, s2, s3, s4, s5, y_filter, offset_const);
@@ -847,17 +847,17 @@
       src_ptr += src_stride;
       dst_ptr += dst_stride;
       h--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (h != 0);
   } else {
     int16x8_t s0, s1, s2, s3, s4, s5;
     uint16x8_t dd0, d0;
     uint8x8_t d0_u8;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s6, s7, s8;
     uint16x8_t dd1, dd2, dd3, d1, d2, d3;
     uint8x8_t d1_u8, d2_u8, d3_u8;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       int16_t *s = src_ptr;
@@ -869,7 +869,7 @@
       s += 5 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_s16_8x4(s, src_stride, &s5, &s6, &s7, &s8);
 
         d0 = convolve6_8_2d_v(s0, s1, s2, s3, s4, s5, y_filter, offset_const);
@@ -898,7 +898,7 @@
         s += 4 * src_stride;
         d += 4 * dst_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         s5 = vld1q_s16(s);
 
         d0 = convolve6_8_2d_v(s0, s1, s2, s3, s4, s5, y_filter, offset_const);
@@ -923,7 +923,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height != 0);
       src_ptr += 8;
       dst_ptr += 8;
@@ -1008,17 +1008,17 @@
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7;
     uint16x4_t dd0, d0;
     uint8x8_t d01_u8;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s8, s9, s10;
     uint16x4_t dd1, dd2, dd3, d1, d2, d3;
     uint8x8_t d23_u8;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     load_s16_4x7(src_ptr, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6);
     src_ptr += 7 * src_stride;
 
     do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       load_s16_4x4(src_ptr, src_stride, &s7, &s8, &s9, &s10);
 
       d0 = convolve8_4_2d_v(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
@@ -1056,7 +1056,7 @@
       src_ptr += 4 * src_stride;
       dst_ptr += 4 * dst_stride;
       h -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       s7 = vld1_s16(src_ptr);
 
       d0 = convolve8_4_2d_v(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
@@ -1085,17 +1085,17 @@
       src_ptr += src_stride;
       dst_ptr += dst_stride;
       h--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (h != 0);
   } else {
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
     uint16x8_t dd0, d0;
     uint8x8_t d0_u8;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s8, s9, s10;
     uint16x8_t dd1, dd2, dd3, d1, d2, d3;
     uint8x8_t d1_u8, d2_u8, d3_u8;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       int16_t *s = src_ptr;
@@ -1107,7 +1107,7 @@
       s += 7 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_s16_8x4(s, src_stride, &s7, &s8, &s9, &s10);
 
         d0 = convolve8_8_2d_v(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
@@ -1142,7 +1142,7 @@
         s += 4 * src_stride;
         d += 4 * dst_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         s7 = vld1q_s16(s);
 
         d0 = convolve8_8_2d_v(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
@@ -1170,7 +1170,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height != 0);
       src_ptr += 8;
       dst_ptr += 8;
@@ -1321,7 +1321,7 @@
   }
 }
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_MATMUL_INT8)
 
 static INLINE uint16x4_t convolve8_4_x(uint8x16_t samples,
                                        const int8x8_t x_filter,
@@ -1492,7 +1492,7 @@
   }
 }
 
-#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#elif AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE uint16x4_t convolve8_4_x(uint8x16_t samples,
                                        const int8x8_t x_filter,
@@ -1678,7 +1678,7 @@
   }
 }
 
-#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 static INLINE uint16x4_t convolve8_4_x(const int16x4_t s0, const int16x4_t s1,
                                        const int16x4_t s2, const int16x4_t s3,
@@ -1764,19 +1764,19 @@
   int height = h;
 
   uint8x8_t t0;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   uint8x8_t t1, t2, t3, t4, t5, t6, t7;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
   if ((w == 4) || (h == 4)) {
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
     uint16x4_t d0, dd0;
     uint8x8_t d01;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s9, s10;
     uint16x4_t d1, d2, d3, dd1, dd2, dd3;
     uint8x8_t d23;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       d = dst_ptr;
@@ -1784,7 +1784,7 @@
       width = w;
 
       __builtin_prefetch(src_ptr + 0 * src_stride);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       __builtin_prefetch(src_ptr + 1 * src_stride);
       __builtin_prefetch(src_ptr + 2 * src_stride);
       __builtin_prefetch(src_ptr + 3 * src_stride);
@@ -1868,7 +1868,7 @@
       dst_ptr += 4 * dst_stride;
       dst8_ptr += 4 * dst8_stride;
       height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       t0 = vld1_u8(src_ptr);  // a0 a1 a2 a3 a4 a5 a6 a7
       s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
       s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
@@ -1917,7 +1917,7 @@
       dst_ptr += dst_stride;
       dst8_ptr += dst8_stride;
       height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (height != 0);
   } else {
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
@@ -1929,7 +1929,7 @@
       d_u8 = dst8_ptr;
       width = w;
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       int16x8_t s9, s10, s11, s12, s13, s14;
       uint16x8_t d1, d2, d3, d4, d5, d6, d7, dd1, dd2, dd3, dd4, dd5, dd6, dd7;
       uint8x8_t d1_u8, d2_u8, d3_u8, d4_u8, d5_u8, d6_u8, d7_u8;
@@ -2034,7 +2034,7 @@
       dst_ptr += 8 * dst_stride;
       dst8_ptr += 8 * dst8_stride;
       height -= 8;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
       __builtin_prefetch(src_ptr);
 
       t0 = vld1_u8(src_ptr);
@@ -2080,12 +2080,12 @@
       dst_ptr += dst_stride;
       dst8_ptr += dst8_stride;
       height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
     } while (height != 0);
   }
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 static INLINE uint16x4_t convolve6_4_y(const int16x4_t s0, const int16x4_t s1,
                                        const int16x4_t s2, const int16x4_t s3,
@@ -2152,11 +2152,11 @@
     int16x4_t s0, s1, s2, s3, s4, s5;
     uint16x4_t d0, dd0;
     uint8x8_t t0, t1, t2, t3, t4, d01;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s6, s7, s8;
     uint16x4_t d1, d2, d3, dd1, dd2, dd3;
     uint8x8_t d23;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       const uint8_t *s = src_ptr;
@@ -2179,7 +2179,7 @@
       s += 5 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         t0 = load_unaligned_u8_4x1(s + 0 * src_stride);
         t1 = load_unaligned_u8_4x1(s + 1 * src_stride);
         t2 = load_unaligned_u8_4x1(s + 2 * src_stride);
@@ -2223,7 +2223,7 @@
         d += 4 * dst_stride;
         d_u8 += 4 * dst8_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         t0 = load_unaligned_u8_4x1(s);
         s5 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(t0)));
 
@@ -2251,7 +2251,7 @@
         d += dst_stride;
         d_u8 += dst8_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height != 0);
       src_ptr += 4;
       dst_ptr += 4;
@@ -2262,11 +2262,11 @@
     int16x8_t s0, s1, s2, s3, s4, s5;
     uint16x8_t d0, dd0;
     uint8x8_t d0_u8, t0, t1, t2, t3, t4;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s6, s7, s8, s9, s10, s11, s12;
     uint16x8_t d1, d2, d3, d4, d5, d6, d7, dd1, dd2, dd3, dd4, dd5, dd6, dd7;
     uint8x8_t d1_u8, d2_u8, d3_u8, d4_u8, d5_u8, d6_u8, d7_u8, t5, t6, t7;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       const uint8_t *s = src_ptr + (5 * src_stride);
@@ -2283,7 +2283,7 @@
       s4 = vreinterpretq_s16_u16(vmovl_u8(t4));
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
 
         s5 = vreinterpretq_s16_u16(vmovl_u8(t0));
@@ -2336,7 +2336,7 @@
         s += 8 * src_stride;
         d += 8 * dst_stride;
         height -= 8;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         s5 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
 
         d0 = convolve6_8_y(s0, s1, s2, s3, s4, s5, y_filter, round_offset_vec);
@@ -2362,7 +2362,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height != 0);
       src_ptr += 8;
       dst_ptr += 8;
@@ -2441,11 +2441,11 @@
     int16x4_t s0, s1, s2, s3, s4, s5, s6, s7;
     uint16x4_t d0, dd0;
     uint8x8_t t0, t1, t2, t3, t4, t5, t6, d01;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x4_t s8, s9, s10;
     uint16x4_t d1, d2, d3, dd1, dd2, dd3;
     uint8x8_t d23;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       const uint8_t *s = src_ptr;
@@ -2482,7 +2482,7 @@
       s += 7 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         t0 = load_unaligned_u8_4x1(s + 0 * src_stride);
         t1 = load_unaligned_u8_4x1(s + 1 * src_stride);
         t2 = load_unaligned_u8_4x1(s + 2 * src_stride);
@@ -2538,7 +2538,7 @@
         d += 4 * dst_stride;
         d_u8 += 4 * dst8_stride;
         height -= 4;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         t0 = load_unaligned_u8_4x1(s);
         s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(t0)));
 
@@ -2570,7 +2570,7 @@
         d += dst_stride;
         d_u8 += dst8_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height != 0);
       src_ptr += 4;
       dst_ptr += 4;
@@ -2581,11 +2581,11 @@
     int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
     uint16x8_t d0, dd0;
     uint8x8_t d0_u8, t0, t1, t2, t3, t4, t5, t6;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     int16x8_t s8, s9, s10, s11, s12, s13, s14;
     uint16x8_t d1, d2, d3, d4, d5, d6, d7, dd1, dd2, dd3, dd4, dd5, dd6, dd7;
     uint8x8_t d1_u8, d2_u8, d3_u8, d4_u8, d5_u8, d6_u8, d7_u8, t7;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
     do {
       const uint8_t *s = src_ptr;
@@ -2614,7 +2614,7 @@
       s += 7 * src_stride;
 
       do {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
         load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
 
         s7 = vreinterpretq_s16_u16(vmovl_u8(t0));
@@ -2685,7 +2685,7 @@
         s += 8 * src_stride;
         d += 8 * dst_stride;
         height -= 8;
-#else   // !defined(__aarch64__)
+#else   // !AOM_ARCH_AARCH64
         s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
 
         __builtin_prefetch(dst_ptr);
@@ -2718,7 +2718,7 @@
         s += src_stride;
         d += dst_stride;
         height--;
-#endif  // defined(__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       } while (height != 0);
       src_ptr += 8;
       dst_ptr += 8;

diff --git a/av1/common/arm/reconintra_neon.c b/av1/common/arm/reconintra_neon.c
index 43c470f..07e6b0b 100644
--- a/av1/common/arm/reconintra_neon.c
+++ b/av1/common/arm/reconintra_neon.c

@@ -126,7 +126,7 @@
       out_45 = vmlaq_s16(out_45, vreinterpretq_s16_u16(p_b_hi), f5f4_hi);
       int16x8_t out_67 = vmulq_s16(vreinterpretq_s16_u16(p_b_lo), f7f6_lo);
       out_67 = vmlaq_s16(out_67, vreinterpretq_s16_u16(p_b_hi), f7f6_hi);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       const int16x8_t out_0123 = vpaddq_s16(out_01, out_23);
       const int16x8_t out_4567 = vpaddq_s16(out_45, out_67);
       const int16x8_t out_01234567 = vpaddq_s16(out_0123, out_4567);
@@ -137,7 +137,7 @@
                                               vqmovn_s32(vpaddlq_s16(out_67)));
       const int16x8_t out_01234567 = vcombine_s16(
           vqmovn_s32(vpaddlq_s16(out_0123)), vqmovn_s32(vpaddlq_s16(out_4567)));
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
       const uint32x2_t out_r =
           vreinterpret_u32_u8(vqmovun_s16(vrshrq_n_s16(out_01234567, 4)));
       // Storing

diff --git a/av1/common/arm/wiener_convolve_neon.c b/av1/common/arm/wiener_convolve_neon.c
index 1d36f68..d7f511d 100644
--- a/av1/common/arm/wiener_convolve_neon.c
+++ b/av1/common/arm/wiener_convolve_neon.c

@@ -153,7 +153,7 @@
   height = intermediate_height;
 
   // For aarch_64.
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   int processed_height = 0;
   uint16_t *d_tmp;
   int width, remaining_height;

diff --git a/av1/encoder/arm/crc32/hash_crc32.c b/av1/encoder/arm/crc32/hash_crc32.c
index dd8685d..5c22e7d 100644
--- a/av1/encoder/arm/crc32/hash_crc32.c
+++ b/av1/encoder/arm/crc32/hash_crc32.c

@@ -37,7 +37,7 @@
   const uint8_t *buf = p;
   uint32_t crc = 0xFFFFFFFF;
 
-#if !defined(__aarch64__)
+#if !AOM_ARCH_AARCH64
   // Align input to 8-byte boundary (only necessary for 32-bit builds.)
   while (len && ((uintptr_t)buf & 7)) {
     crc = __crc32cb(crc, *buf++);

diff --git a/av1/encoder/arm/neon/av1_error_neon.c b/av1/encoder/arm/neon/av1_error_neon.c
index 124c1fd..cd7b434 100644
--- a/av1/encoder/arm/neon/av1_error_neon.c
+++ b/av1/encoder/arm/neon/av1_error_neon.c

@@ -48,7 +48,7 @@
     block_size -= 8;
   } while (block_size != 0);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   *ssz = vaddvq_s64(sqcoeff);
   return vaddvq_s64(error);
 #else

diff --git a/av1/encoder/arm/neon/av1_fwd_txfm2d_neon.c b/av1/encoder/arm/neon/av1_fwd_txfm2d_neon.c
index 3640cf1..ee8b115 100644
--- a/av1/encoder/arm/neon/av1_fwd_txfm2d_neon.c
+++ b/av1/encoder/arm/neon/av1_fwd_txfm2d_neon.c

@@ -24,7 +24,7 @@
 
 static INLINE void transpose_16bit_4x4(const int16x8_t *const in,
                                        int16x8_t *const out) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const int16x8_t a0 = vzip1q_s16(in[0], in[1]);
   const int16x8_t a1 = vzip1q_s16(in[2], in[3]);
 #else
@@ -45,7 +45,7 @@
 
 static INLINE void transpose_16bit_4x8(const int16x8_t *const in,
                                        int16x8_t *const out) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const int16x8_t a0 = vzip1q_s16(in[0], in[1]);
   const int16x8_t a1 = vzip1q_s16(in[2], in[3]);
   const int16x8_t a2 = vzip1q_s16(in[4], in[5]);
@@ -67,7 +67,7 @@
   const int32x4x2_t b13 =
       vzipq_s32(vreinterpretq_s32_s16(a2), vreinterpretq_s32_s16(a3));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   out[0] = vreinterpretq_s16_s64(vzip1q_s64(vreinterpretq_s64_s32(b02.val[0]),
                                             vreinterpretq_s64_s32(b13.val[0])));
   out[1] = vreinterpretq_s16_s64(vzip2q_s64(vreinterpretq_s64_s32(b02.val[0]),
@@ -100,7 +100,7 @@
 
   const int32x4_t zeros = vdupq_n_s32(0);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   out[0] = vreinterpretq_s16_s64(vzip1q_s64(vreinterpretq_s64_s32(b01.val[0]),
                                             vreinterpretq_s64_s32(zeros)));
   out[1] = vreinterpretq_s16_s64(vzip2q_s64(vreinterpretq_s64_s32(b01.val[0]),
@@ -149,7 +149,7 @@
   const int32x4x2_t b37 = vzipq_s32(vreinterpretq_s32_s16(a26.val[1]),
                                     vreinterpretq_s32_s16(a37.val[1]));
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   out[0] = vreinterpretq_s16_s64(vzip1q_s64(vreinterpretq_s64_s32(b04.val[0]),
                                             vreinterpretq_s64_s32(b15.val[0])));
   out[1] = vreinterpretq_s16_s64(vzip2q_s64(vreinterpretq_s64_s32(b04.val[0]),

diff --git a/av1/encoder/arm/neon/av1_highbd_quantize_neon.c b/av1/encoder/arm/neon/av1_highbd_quantize_neon.c
index 197eae0..5481928 100644
--- a/av1/encoder/arm/neon/av1_highbd_quantize_neon.c
+++ b/av1/encoder/arm/neon/av1_highbd_quantize_neon.c

@@ -65,7 +65,7 @@
 }
 
 static INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   return (uint16_t)vmaxvq_s16(v_eobmax);
 #else
   const int16x4_t v_eobmax_3210 =

diff --git a/av1/encoder/arm/neon/av1_k_means_neon.c b/av1/encoder/arm/neon/av1_k_means_neon.c
index d421f76..dd9ab9d 100644
--- a/av1/encoder/arm/neon/av1_k_means_neon.c
+++ b/av1/encoder/arm/neon/av1_k_means_neon.c

@@ -16,7 +16,7 @@
 static int32x4_t k_means_multiply_add_neon(const int16x8_t a) {
   const int32x4_t l = vmull_s16(vget_low_s16(a), vget_low_s16(a));
   const int32x4_t h = vmull_s16(vget_high_s16(a), vget_high_s16(a));
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vpaddq_s32(l, h);
 #else
   const int32x2_t dl = vpadd_s32(vget_low_s32(l), vget_high_s32(l));

diff --git a/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c b/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c
index 3528105..18cd0ce 100644
--- a/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c
+++ b/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c

@@ -24,7 +24,7 @@
 
 // Compute the sum of all pixel differences of this MB.
 static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   return vaddlvq_s8(v_sum_diff_total);
 #else
   const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);

diff --git a/av1/encoder/arm/neon/encodetxb_neon.c b/av1/encoder/arm/neon/encodetxb_neon.c
index 0af2521..e312863 100644
--- a/av1/encoder/arm/neon/encodetxb_neon.c
+++ b/av1/encoder/arm/neon/encodetxb_neon.c

@@ -37,7 +37,7 @@
           vcombine_s16(vqmovn_s32(coeffA), vqmovn_s32(coeffB));
       const int16x8_t absAB = vqabsq_s16(coeffAB);
       const int8x8_t absABs = vqmovn_s16(absAB);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       const int8x16_t absAB8 =
           vcombine_s8(absABs, vreinterpret_s8_s32(vget_low_s32(zeros)));
       const uint8x16_t lsAB =
@@ -188,7 +188,7 @@
 
 static INLINE uint8x16_t load_8bit_4x4_to_1_reg(const uint8_t *const src,
                                                 const int byte_stride) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   uint32x4_t v_data = vld1q_u32((uint32_t *)src);
   v_data = vld1q_lane_u32((uint32_t *)(src + 1 * byte_stride), v_data, 1);
   v_data = vld1q_lane_u32((uint32_t *)(src + 2 * byte_stride), v_data, 2);
@@ -202,7 +202,7 @@
 
 static INLINE uint8x16_t load_8bit_8x2_to_1_reg(const uint8_t *const src,
                                                 const int byte_stride) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   uint64x2_t v_data = vld1q_u64((uint64_t *)src);
   v_data = vld1q_lane_u64((uint64_t *)(src + 1 * byte_stride), v_data, 1);
 

diff --git a/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c b/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
index 64e4dae..15d375a 100644
--- a/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c
+++ b/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c

@@ -47,7 +47,7 @@
   return x;
 }
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
 #define TRANSPOSE_4X4(x0, x1, x2, x3, y0, y1, y2, y3)         \
   do {                                                        \
     int32x4x2_t swap_low = vtrnq_s32(x0, x1);                 \
@@ -79,7 +79,7 @@
     y3 = vextq_s32(swap_low.val[1],                                      \
                    vextq_s32(swap_high.val[1], swap_high.val[1], 2), 2); \
   } while (0)
-#endif  // (__aarch64__)
+#endif  // AOM_ARCH_AARCH64
 
 static INLINE void transpose_4x4(const int32x4_t *in, int32x4_t *out) {
   TRANSPOSE_4X4(in[0], in[1], in[2], in[3], out[0], out[1], out[2], out[3]);

diff --git a/av1/encoder/arm/neon/ml_neon.c b/av1/encoder/arm/neon/ml_neon.c
index fcff3a9..f456f55 100644
--- a/av1/encoder/arm/neon/ml_neon.c
+++ b/av1/encoder/arm/neon/ml_neon.c

@@ -46,7 +46,7 @@
     vadd = vmlaq_f32(vadd, inputs_h, weights_h);
     vadd = vmlaq_f32(vadd, inputs_l, weights_l);
   }
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   total += vaddvq_f32(vadd);
 #else
   float32x2_t vadd_lo = vadd_f32(vget_low_f32(vadd), vget_high_f32(vadd));
@@ -80,7 +80,7 @@
     j -= 8;
   }
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   total += vaddvq_f32(vadd);
 
 #else
@@ -98,7 +98,7 @@
                                const float *layer_bias,
                                float *const output_nodes) {
   float total = *layer_bias;
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const float32x4_t v_inputs = vld1q_f32(inputs);
   const float32x4_t v_weights = vld1q_f32(weights);
   const float32x4_t vadd = vmulq_f32(v_inputs, v_weights);
@@ -126,7 +126,7 @@
     vadd = vmlaq_f32(vadd, v_inputs, v_weights);
   }
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   total += vaddvq_f32(vadd);
 #else
   float32x2_t vadd_lo = vadd_f32(vget_low_f32(vadd), vget_high_f32(vadd));
@@ -159,7 +159,7 @@
     }
   }
   for (int i = 0; i < 2; i++)
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     mul0[i] = vpaddq_f32(mul0[i], mul1[i]);
   const float32x4_t hh = vpaddq_f32(mul0[0], mul0[1]);
 #else
@@ -197,7 +197,7 @@
     }
   }
   for (int i = 0; i < 4; i++)
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     mul0[i] = vpaddq_f32(mul0[i], mul1[i]);
   const float32x4_t hh0 = vpaddq_f32(mul0[0], mul0[1]);
   const float32x4_t hh1 = vpaddq_f32(mul0[2], mul0[3]);
@@ -239,7 +239,7 @@
       add[i] = vmlaq_f32(add[i], inputs_h, weight_h);
     }
   }
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   const float32x4_t hadd_h = vpaddq_f32(add[2], add[3]);
   const float32x4_t hadd_l = vpaddq_f32(add[0], add[1]);
   const float32x4_t haddhadd = vpaddq_f32(hadd_l, hadd_h);

diff --git a/av1/encoder/arm/neon/picksrt_neon.c b/av1/encoder/arm/neon/picksrt_neon.c
index a1e7765..1346d6b 100644
--- a/av1/encoder/arm/neon/picksrt_neon.c
+++ b/av1/encoder/arm/neon/picksrt_neon.c

@@ -141,10 +141,10 @@
     }
     sum64 = vpaddlq_u32(err0);
   }
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   err += vaddvq_u64(sum64);
 #else
   err += vget_lane_u64(vadd_u64(vget_low_u64(sum64), vget_high_u64(sum64)), 0);
-#endif  // __aarch64__
+#endif  // AOM_ARCH_AARCH64
   return err;
 }

diff --git a/av1/encoder/arm/neon/quantize_neon.c b/av1/encoder/arm/neon/quantize_neon.c
index dbfbeef..c590702 100644
--- a/av1/encoder/arm/neon/quantize_neon.c
+++ b/av1/encoder/arm/neon/quantize_neon.c

@@ -26,7 +26,7 @@
 #include "av1/encoder/rd.h"
 
 static INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
-#ifdef __aarch64__
+#if AOM_ARCH_AARCH64
   return (uint16_t)vmaxvq_s16(v_eobmax);
 #else
   const int16x4_t v_eobmax_3210 =

diff --git a/av1/encoder/arm/neon/rdopt_neon.c b/av1/encoder/arm/neon/rdopt_neon.c
index 25df6b4..cf443fd 100644
--- a/av1/encoder/arm/neon/rdopt_neon.c
+++ b/av1/encoder/arm/neon/rdopt_neon.c

@@ -97,7 +97,7 @@
     v_x_sum = vpadalq_s32(v_x_sum, x_sum_32);
     v_x2_sum = vpadalq_s32(v_x2_sum, x2_sum_32);
   }
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
   xy_sum = vaddvq_s64(v_xy_sum);
   xz_sum = vaddvq_s64(v_xz_sum);
   x2_sum = vaddvq_s64(v_x2_sum);
@@ -160,7 +160,7 @@
       v_y2_sum = vmlal_s16(v_y2_sum, v_y_hi, v_y_hi);
       const int32x4_t v_y_sum_a = vpadalq_s16(v_y_sum, v_y);
       const int64x2_t v_xy_sum2 = vpaddlq_s32(v_xy_sum_a);
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       const int64x2_t v_y2_sum_a = vpaddlq_s32(v_y2_sum);
       xy_sum += vaddvq_s64(v_xy_sum2);
       const int32_t y = vaddvq_s32(v_y_sum_a);
@@ -278,7 +278,7 @@
       v_x_sum_a = vpadalq_s16(v_x_sum_a, v_y);
       v_x_sum_a = vpadalq_s16(v_x_sum_a, v_w);
 
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
       xy_sum += vaddvq_s64(vpaddlq_s32(v_xy_sum_a));
       xz_sum += vaddvq_s64(vpaddlq_s32(v_xz_sum_a));
       x_sum += vaddvq_s32(v_x_sum_a);
@@ -398,7 +398,7 @@
       v_x2_firstrow = vmlal_s16(v_x2_firstrow, v_diff_lo, v_diff_lo);
       v_x2_firstrow = vmlal_s16(v_x2_firstrow, v_diff_hi, v_diff_hi);
     }
-#if defined(__aarch64__)
+#if AOM_ARCH_AARCH64
     x_firstrow += vaddvq_s32(v_x_firstrow);
     x2_firstrow += vaddvq_s32(v_x2_firstrow);
 #else

diff --git a/av1/encoder/arm/neon/temporal_filter_neon.c b/av1/encoder/arm/neon/temporal_filter_neon.c
index c89e83e..4d14845 100644
--- a/av1/encoder/arm/neon/temporal_filter_neon.c
+++ b/av1/encoder/arm/neon/temporal_filter_neon.c

@@ -21,7 +21,7 @@
 // For the squared error buffer, add padding for 4 samples.
 #define SSE_STRIDE (BW + 4)
 
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 // clang-format off
 
@@ -192,7 +192,7 @@
   }
 }
 
-#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else  // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
 
 // When using vld1q_u16_x4 compilers may insert an alignment hint of 256 bits.
 DECLARE_ALIGNED(32, static const uint16_t, kSlidingWindowMask[]) = {
@@ -350,7 +350,7 @@
   }
 }
 
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
 void av1_apply_temporal_filter_neon(
     const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
@@ -392,11 +392,11 @@
   double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
   s_decay = CLIP(s_decay, 1e-5, 1);
   double d_factor[4] = { 0 };
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
   uint8_t frame_abs_diff[SSE_STRIDE * BH] = { 0 };
-#else   // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else   // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
   uint16_t frame_sse[SSE_STRIDE * BH] = { 0 };
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
   uint32_t luma_sse_sum[BW * BH] = { 0 };
 
   for (int subblock_idx = 0; subblock_idx < 4; subblock_idx++) {
@@ -435,7 +435,7 @@
     // search is only done on Y-plane, so the information from Y-plane
     // will be more accurate. The luma sse sum is reused in both chroma
     // planes.
-#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#if AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
     if (plane == AOM_PLANE_U) {
       for (unsigned int i = 0; i < plane_h; i++) {
         for (unsigned int j = 0; j < plane_w; j++) {
@@ -460,7 +460,7 @@
                           count + plane_offset, frame_abs_diff, luma_sse_sum,
                           inv_num_ref_pixels, decay_factor, inv_factor,
                           weight_factor, d_factor, tf_wgt_calc_lvl);
-#else   // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+#else   // !(AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD))
     if (plane == AOM_PLANE_U) {
       for (unsigned int i = 0; i < plane_h; i++) {
         for (unsigned int j = 0; j < plane_w; j++) {
@@ -483,7 +483,7 @@
                           count + plane_offset, frame_sse, luma_sse_sum,
                           inv_num_ref_pixels, decay_factor, inv_factor,
                           weight_factor, d_factor, tf_wgt_calc_lvl);
-#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+#endif  // AOM_ARCH_AARCH64 && defined(__ARM_FEATURE_DOTPROD)
 
     plane_offset += plane_h * plane_w;
   }
commit	fe7676b2c113501b3f1ef399b16f8aebd0572c44	[log] [tgz]
author	James Zern <jzern@google.com>	Mon May 22 13:18:43 2023 -0700
committer	James Zern <jzern@google.com>	Thu Jun 01 20:01:27 2023 +0000
tree	66be74865012e9429ef3f4957b95126e72842ba2
parent	e733ed5b6417658eb304cc55cdbbb07e3dd766ec [diff]