Add SIMD code for block copies. Change-Id: I696da03fb5e9e87d054a9aa9238ad96937a0e281

commit: 54170d92c9df49e2176f45dcbb6603f67b6bada9 [log] [tgz]
author: Michael Bebenita <mbebenita@mozilla.com> Fri Mar 31 15:48:44 2017 -0700
committer: Jean-Marc Valin <jvalin@mozilla.com> Sat Apr 01 04:44:25 2017 +0000
tree: f65a3184bc2b04c03d9eff9637fe8f02a24f71b5
parent: 19f76635746c20cf5be943c1d2e90f3fbfd3349c [diff] [blame]
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 0f47410..3713e11 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -628,7 +628,15 @@
   add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
   add_proto qw/int od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
   add_proto qw/int od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
-  # VS compiling for 32 bit targets does not support vector types in
+
+  add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
+  add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
+  add_proto qw/void copy_8x8_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
+  add_proto qw/void copy_4x4_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride";
+  add_proto qw/void copy_nxm_8bit_to_16bit/, "uint16_t *dst, int dstride, const uint8_t *src, int sstride, int n, int m";
+  add_proto qw/void copy_nxm_16bit_to_16bit/, "uint16_t *dst, int dstride, const uint16_t *src, int sstride, int n, int m";
+
+# VS compiling for 32 bit targets does not support vector types in
   # structs as arguments, which makes the v256 type of the intrinsics
   # hard to support, so optimizations for this target are disabled.
   if ($opts{config} !~ /libs-x86-win32-vs.*/) {
@@ -639,6 +647,13 @@
     specialize qw/od_dir_find8 sse2 ssse3 sse4_1 neon/;
     specialize qw/od_filter_dering_direction_4x4 sse2 ssse3 sse4_1 neon/;
     specialize qw/od_filter_dering_direction_8x8 sse2 ssse3 sse4_1 neon/;
+
+    specialize qw/copy_8x8_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
+    specialize qw/copy_4x4_16bit_to_8bit sse2 ssse3 sse4_1 neon/;
+    specialize qw/copy_8x8_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
+    specialize qw/copy_4x4_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
+    specialize qw/copy_nxm_8bit_to_16bit sse2 ssse3 sse4_1 neon/;
+    specialize qw/copy_nxm_16bit_to_16bit sse2 ssse3 sse4_1 neon/;
   }
 }
commit	54170d92c9df49e2176f45dcbb6603f67b6bada9	[log] [tgz]
author	Michael Bebenita <mbebenita@mozilla.com>	Fri Mar 31 15:48:44 2017 -0700
committer	Jean-Marc Valin <jvalin@mozilla.com>	Sat Apr 01 04:44:25 2017 +0000
tree	f65a3184bc2b04c03d9eff9637fe8f02a24f71b5
parent	19f76635746c20cf5be943c1d2e90f3fbfd3349c [diff] [blame]