Hybrid forward transforms 16x16 AVX2 optimization
- Unit tests are added for AVX2 SIMD.
- Encoder speed improvement:
AV1 baseline and EXT_TX, three 1080p sequences at bitrate:
800 Kbps, 2 Mbps, 6 Mbps, on i7-6700 CPU, average
user level time reduction: 3.86%.
Change-Id: Ibbd7837ee3a831c6b1e4e471bf6c8d3fa3a19ff4
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 4bf89f9..ad19c55 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -378,7 +378,7 @@
specialize qw/av1_fht8x8 sse2/;
add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
-specialize qw/av1_fht16x16 sse2/;
+specialize qw/av1_fht16x16 sse2 avx2/;
add_proto qw/void av1_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/av1_fht32x32/;