Vectorize self-guided filter Add an SSE4.1 lowbd version of the self-guided filter for loop-restoration, and apply some optimizations to the C version. Approximate times per 128x128 / 256x256 tile on the machine this was developed on: Previous C: 620us / 2800us Optimized C: 500us / 2200us ( 24% / 27% faster) SSE4.1: 147us / 600us (320% / 370% faster) Change-Id: I23ff5a5482a191aeb06f9d1f767a9f036bb357fe

commit: ce110cc563d89885d2d8e20c2f844c57642a78d3 [log] [tgz]
author: David Barker <david.barker@argondesign.com> Wed Feb 22 10:38:59 2017 +0000
committer: Debargha Mukherjee <debargha@google.com> Mon Mar 06 20:23:16 2017 +0000
tree: 5f4f3acd867388aaea213a739aba9135779fd0fb
parent: 4d5bbbd907a1eb7acdafe2a4a7594721ec33a850 [diff] [blame]
diff --git a/test/test.mk b/test/test.mk
index 01014e6..8ffa87a 100644
--- a/test/test.mk
+++ b/test/test.mk

@@ -209,6 +209,9 @@
 ifneq ($(findstring yes,$(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION)),)
 LIBAOM_TEST_SRCS-$(HAVE_SSE2) += warp_filter_test.cc
 endif
+ifeq ($(CONFIG_LOOP_RESTORATION),yes)
+LIBAOM_TEST_SRCS-$(HAVE_SSE4_1) += selfguided_filter_test.cc
+endif
 
 TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
 TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
commit	ce110cc563d89885d2d8e20c2f844c57642a78d3	[log] [tgz]
author	David Barker <david.barker@argondesign.com>	Wed Feb 22 10:38:59 2017 +0000
committer	Debargha Mukherjee <debargha@google.com>	Mon Mar 06 20:23:16 2017 +0000
tree	5f4f3acd867388aaea213a739aba9135779fd0fb
parent	4d5bbbd907a1eb7acdafe2a4a7594721ec33a850 [diff] [blame]