Add s8 -> s16 unpack instrinsics

Change-Id: Iec22c6442c55a5908d858766ff6dfb8bff69835d
diff --git a/test/simd_impl.h b/test/simd_impl.h
index b643970..5cfda67 100644
--- a/test/simd_impl.h
+++ b/test/simd_impl.h
@@ -268,6 +268,8 @@
             SIMD_TUPLE(v64_abs_s16, 0U, 0U),
             SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
             SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
+            SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
             SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
             SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
             SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
@@ -311,11 +313,11 @@
             SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
             SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
             SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
-            SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U),
-            SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
-            SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U));
+            SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
+            SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
+            SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
             SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
             SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
             SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
@@ -441,9 +443,11 @@
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
             SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
             SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
             SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
             SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
             SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
             SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
             SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
@@ -483,11 +487,11 @@
             SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
-            SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U),
-            SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
-            SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U));
+            SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
+            SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
@@ -532,11 +536,11 @@
             SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
-            SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U),
-            SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
-            SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U));
+            SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
+            SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
+            SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
             SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
@@ -558,6 +562,7 @@
 
 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
             SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
+            SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
             SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
             SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));