Fix av1_inv_txfm2d_add_NxM_neon for arm 32bit
BUG=aomedia:2698
Change-Id: I24731268a629ebe1c9e63632ce23d0b1ad78260d
diff --git a/av1/common/arm/highbd_inv_txfm_neon.c b/av1/common/arm/highbd_inv_txfm_neon.c
index 7234459..eaa5e1e 100644
--- a/av1/common/arm/highbd_inv_txfm_neon.c
+++ b/av1/common/arm/highbd_inv_txfm_neon.c
@@ -629,7 +629,7 @@
u0 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u0x.val[0]),
vreinterpretq_s64_s32(u0x.val[1])));
#else
- u0 = vcombine_s32(vget_low_s32(u0x.val[0]), vget_high_s32(u0x.val[1]));
+ u0 = vcombine_s32(vget_low_s32(u0x.val[0]), vget_low_s32(u0x.val[1]));
#endif // (__aarch64__)
// u1
int32x4x2_t u1x;
@@ -654,7 +654,7 @@
u1 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u1x.val[0]),
vreinterpretq_s64_s32(u1x.val[1])));
#else
- u1 = vcombine_s32(vget_low_s32(u1x.val[0]), vget_high_s32(u1x.val[1]));
+ u1 = vcombine_s32(vget_low_s32(u1x.val[0]), vget_low_s32(u1x.val[1]));
#endif // (__aarch64__)
// u2
@@ -680,7 +680,7 @@
u2 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u2x.val[0]),
vreinterpretq_s64_s32(u2x.val[1])));
#else
- u2 = vcombine_s32(vget_low_s32(u2x.val[0]), vget_high_s32(u2x.val[1]));
+ u2 = vcombine_s32(vget_low_s32(u2x.val[0]), vget_low_s32(u2x.val[1]));
#endif // (__aarch64__)
// u3
@@ -706,7 +706,7 @@
u3 = vreinterpretq_s32_s64(vzip1q_s64(vreinterpretq_s64_s32(u3x.val[0]),
vreinterpretq_s64_s32(u3x.val[1])));
#else
- u3 = vcombine_s32(vget_low_s32(u3x.val[0]), vget_high_s32(u3x.val[1]));
+ u3 = vcombine_s32(vget_low_s32(u3x.val[0]), vget_low_s32(u3x.val[1]));
#endif // (__aarch64__)
out[0] = u0;
@@ -2771,7 +2771,7 @@
vmull_s32(vmovn_s64(vreinterpretq_s64_s32(in[i])), fact));
a0.val[0] = vreinterpretq_s32_s64(
vrshrq_n_s64(vreinterpretq_s64_s32(a0.val[0]), NewSqrt2Bits));
- a0.val[1] = vextq_s32(in[i], zero, 1); // 4
+ a0.val[1] = vextq_s32(in[i], zero, 1);
a0.val[1] = vreinterpretq_s32_s64(
vmull_s32(vmovn_s64(vreinterpretq_s64_s32(a0.val[1])), fact));
a0.val[1] = vreinterpretq_s32_s64(