fix aarch64 compilation w/gcc < 8.5.0

Tested with aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04).

This adds `vld1q_u16_x2` (added in 8.1.0 [1]), and
`vst1q_u16_x2`, `vst1q_u16_x4` (added in 8.5.0 [2]) to mem_neon.h.

[1]: https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/ChangeLog-2017;h=122911df18051992a4e831af8d3d09430dde553a;hb=406c2abec3f998e9064919b22db62f38a7c0e7b9#l90
[2]: https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/ChangeLog;h=b9c7f42907b9546dd53569b67754a23e4b358fdf;hb=eafe83f2f20ef0c1e7703c361ba314b44574523c#l2019

Bug: aomedia:419829066
Change-Id: I26dfedd14883ab87585f57691a6e3c8c64e5f4a9
(cherry picked from commit 146ad35ccc6c3156941e5743c0261811fe59c242)
diff --git a/aom_dsp/arm/mem_neon.h b/aom_dsp/arm/mem_neon.h
index 9e7a0cc..ad761de 100644
--- a/aom_dsp/arm/mem_neon.h
+++ b/aom_dsp/arm/mem_neon.h
@@ -98,6 +98,11 @@
   return res;
 }
 
+static inline uint16x8x2_t vld1q_u16_x2(const uint16_t *ptr) {
+  uint16x8x2_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8) } };
+  return res;
+}
+
 static inline int16x8x2_t vld1q_s16_x2(const int16_t *ptr) {
   int16x8x2_t res = { { vld1q_s16(ptr + 0 * 8), vld1q_s16(ptr + 1 * 8) } };
   return res;
@@ -136,6 +141,18 @@
   vst1_u8(ptr + 2 * 8, a.val[2]);
   vst1_u8(ptr + 3 * 8, a.val[3]);
 }
+
+static inline void vst1q_u16_x2(uint16_t *ptr, uint16x8x2_t a) {
+  vst1q_u16(ptr + 0 * 8, a.val[0]);
+  vst1q_u16(ptr + 1 * 8, a.val[1]);
+}
+
+static inline void vst1q_u16_x4(uint16_t *ptr, uint16x8x4_t a) {
+  vst1q_u16(ptr + 0 * 8, a.val[0]);
+  vst1q_u16(ptr + 1 * 8, a.val[1]);
+  vst1q_u16(ptr + 2 * 8, a.val[2]);
+  vst1q_u16(ptr + 3 * 8, a.val[3]);
+}
 #endif  // ((__GNUC__ << 8) | __GNUC_MINOR__) < 0x805
 #endif  // defined(__GNUC__) && !defined(__clang__)