SSE2 optimization for lpf 16_dual implementations
covers horizontal and vertical variations and
including low and high bitdepth types.
Appropriate tests are enabled
Performance changes, SSE2 over C:
Horizontal methods: up to 3x
Vertical methods: up to 2x
Change-Id: If430a916394c7befa743e4fbaa9913fd37c535ed
diff --git a/test/lpf_test.cc b/test/lpf_test.cc
index f3d0aa1..afde34f 100644
--- a/test/lpf_test.cc
+++ b/test/lpf_test.cc
@@ -450,6 +450,8 @@
8),
make_tuple(&aom_highbd_lpf_horizontal_16_sse2,
&aom_highbd_lpf_horizontal_16_c, 8),
+ make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2,
+ &aom_highbd_lpf_horizontal_16_dual_c, 8),
make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
8),
@@ -460,6 +462,8 @@
10),
make_tuple(&aom_highbd_lpf_horizontal_16_sse2,
&aom_highbd_lpf_horizontal_16_c, 10),
+ make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2,
+ &aom_highbd_lpf_horizontal_16_dual_c, 10),
make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
10),
@@ -470,6 +474,16 @@
12),
make_tuple(&aom_highbd_lpf_horizontal_16_sse2,
&aom_highbd_lpf_horizontal_16_c, 12),
+ make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2,
+ &aom_highbd_lpf_horizontal_16_dual_c, 12),
+ make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
+ 12),
+ make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
+ &aom_highbd_lpf_vertical_16_dual_c, 8),
+ make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
+ &aom_highbd_lpf_vertical_16_dual_c, 10),
+ make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
+ &aom_highbd_lpf_vertical_16_dual_c, 12),
make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12)
};
@@ -482,9 +496,12 @@
make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8),
make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8),
make_tuple(&aom_lpf_horizontal_16_sse2, &aom_lpf_horizontal_16_c, 8),
+ make_tuple(&aom_lpf_horizontal_16_dual_sse2, &aom_lpf_horizontal_16_dual_c,
+ 8),
make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
+ make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, 8)
};
INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_lbd,