Add aom_comp_mask_<upsampled>pred_ssse3 1) For encoder speed, overall ~1% faster with no impact on coding performance. 2) aom_comp_mask_pred_ssse3 is 3.5x - 6x faster than aom_comp_mask_pred_c 3) aom_comp_mask_upsampled_pred_ssse3 1.5x - 3x faster than aom_comp_mask_upsampled_pred_c, for special case where subpel_x == subpel_y == 0, optimized version achieves 4x - 7x speedup Unittest for both functions have been added. Change-Id: Ib498317975e0dbd9cdcf61be327b640dfac9a7e5

commit: 33ba1fe547a42b150f07eb0d65d65dc47fdc6603 [log] [tgz]
author: Peng Bin <binpengsmail@gmail.com> Wed Jan 24 11:48:08 2018 +0800
committer: Bin Peng <binpengsmail@gmail.com> Wed Jan 31 10:07:51 2018 +0000
tree: 4a2314ccf635b6a324e34d9533473ad826b4ff04
parent: 1694a4ff7c534d0023aaf957231529d6a4c3e78a [diff] [blame]
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index df92cfe..db5dfff 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h

@@ -232,8 +232,10 @@
 
 static INLINE int is_any_masked_compound_used(BLOCK_SIZE sb_type) {
   COMPOUND_TYPE comp_type;
+  int i;
   if (!is_comp_ref_allowed(sb_type)) return 0;
-  for (comp_type = 0; comp_type < COMPOUND_TYPES; comp_type++) {
+  for (i = 0; i < COMPOUND_TYPES; i++) {
+    comp_type = (COMPOUND_TYPE)i;
     if (is_masked_compound_type(comp_type) &&
         is_interinter_compound_used(comp_type, sb_type))
       return 1;
@@ -286,8 +288,8 @@
   const int spel_right = spel_left - SUBPEL_SHIFTS;
   const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
   const int spel_bottom = spel_top - SUBPEL_SHIFTS;
-  MV clamped_mv = { src_mv->row * (1 << (1 - ss_y)),
-                    src_mv->col * (1 << (1 - ss_x)) };
+  MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))),
+                    (int16_t)(src_mv->col * (1 << (1 - ss_x))) };
   assert(ss_x <= 1);
   assert(ss_y <= 1);
commit	33ba1fe547a42b150f07eb0d65d65dc47fdc6603	[log] [tgz]
author	Peng Bin <binpengsmail@gmail.com>	Wed Jan 24 11:48:08 2018 +0800
committer	Bin Peng <binpengsmail@gmail.com>	Wed Jan 31 10:07:51 2018 +0000
tree	4a2314ccf635b6a324e34d9533473ad826b4ff04
parent	1694a4ff7c534d0023aaf957231529d6a4c3e78a [diff] [blame]