Implement av1_txb_init_levels_sse4_1 1. Add sse4_1 version of av1_txb_init_levels. 2. The unit test shows it is 1.7x ~ 8.1x faster than C version. 3. For encoder, about 1.7% faster shows by encoding 10 frames of foreman_cif.y4m. a) gcc (Ubuntu 7.2.0-8ubuntu3.2) 7.2.0 b) CPU: Intel(R) Core(TM) i7-6900K CPU @ 3.20GHz c) Config cmd cmake ../ -DENABLE_CCACHE=1 -DCONFIG_LOWBITDEPTH=1 d) Test cmd: ./aomenc --cpu-used=1 --end-usage=vbr \ --target-bitrate=800 --limit=10 Change-Id: I0c2502c3efb39b0197da18aabb4e8255929fcb32

commit: 27d7ca9be8593140699dc212d3ca2b81d1292630 [log] [tgz]
author: Peng Bin <binpengsmail@gmail.com> Thu Mar 22 22:25:56 2018 +0800
committer: Angie Chiang <angiebird@google.com> Thu Mar 29 17:32:54 2018 +0000
tree: 15d6adaa89a1db02c85367adb4af00f0f635966f
parent: 3965d002df8f0b44466845839fe2967f7f9a05f1 [diff] [blame]
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 74b2fc4..c9f5904 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c

@@ -68,7 +68,7 @@
   av1_free_txb_buf(cpi);
   // TODO(jingning): This should be further reduced.
   CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
-                  aom_malloc(sizeof(*cpi->coeff_buffer_base) * size));
+                  aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size));
 }
 
 void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); }
@@ -425,9 +425,8 @@
       qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix);
 }
 
-static INLINE void av1_txb_init_levels(const tran_low_t *const coeff,
-                                       const int width, const int height,
-                                       uint8_t *const levels) {
+void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width,
+                           const int height, uint8_t *const levels) {
   const int stride = width + TX_PAD_HOR;
   uint8_t *ls = levels;
commit	27d7ca9be8593140699dc212d3ca2b81d1292630	[log] [tgz]
author	Peng Bin <binpengsmail@gmail.com>	Thu Mar 22 22:25:56 2018 +0800
committer	Angie Chiang <angiebird@google.com>	Thu Mar 29 17:32:54 2018 +0000
tree	15d6adaa89a1db02c85367adb4af00f0f635966f
parent	3965d002df8f0b44466845839fe2967f7f9a05f1 [diff] [blame]