Highbd parallel_deblocking sse2 optimization

- Decoder speed improves ~13.7% (baseline + parallel_deblocking).
- Highbd loopfilter AVX2 version works when this experiment is
  disabled.

Change-Id: I5d56b137a1d52236a4735656c370d57ef71ae043
diff --git a/test/lpf_8_test.cc b/test/lpf_8_test.cc
index 30ef04a..5cbd92e 100644
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -499,7 +499,7 @@
 
 using std::tr1::make_tuple;
 
-#if HAVE_SSE2 && (!CONFIG_PARALLEL_DEBLOCKING)
+#if HAVE_SSE2
 #if CONFIG_HIGHBITDEPTH
 
 const loop8_param_t kHbdLoop8Test6[] = {
@@ -550,6 +550,7 @@
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
                         ::testing::ValuesIn(kHbdLoop8Test6));
 #else
+#if !CONFIG_PARALLEL_DEBLOCKING
 const loop8_param_t kLoop8Test6[] = {
   make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
   make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
@@ -564,10 +565,11 @@
 
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param,
                         ::testing::ValuesIn(kLoop8Test6));
-#endif  // CONFIG_HIGHBITDEPTH
 #endif
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE2
 
-#if HAVE_AVX2 && (!CONFIG_PARALLEL_DEBLOCKING)
+#if HAVE_AVX2
 #if CONFIG_HIGHBITDEPTH
 
 const loop8_param_t kHbdLoop8Test6Avx2[] = {
@@ -600,7 +602,7 @@
                                  &aom_lpf_horizontal_edge_16_c, 8)));
 #endif
 
-#if HAVE_SSE2 && (!CONFIG_PARALLEL_DEBLOCKING)
+#if HAVE_SSE2
 #if CONFIG_HIGHBITDEPTH
 const dualloop8_param_t kHbdLoop8Test9[] = {
   make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
@@ -632,6 +634,7 @@
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
                         ::testing::ValuesIn(kHbdLoop8Test9));
 #else
+#if !CONFIG_PARALLEL_DEBLOCKING
 const dualloop8_param_t kLoop8Test9[] = {
   make_tuple(&aom_lpf_horizontal_4_dual_sse2, &aom_lpf_horizontal_4_dual_c, 8),
   make_tuple(&aom_lpf_horizontal_8_dual_sse2, &aom_lpf_horizontal_8_dual_c, 8),
@@ -641,10 +644,11 @@
 
 INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test9Param,
                         ::testing::ValuesIn(kLoop8Test9));
-#endif  // CONFIG_HIGHBITDEPTH
 #endif
+#endif  // CONFIG_HIGHBITDEPTH
+#endif  // HAVE_SSE2
 
-#if HAVE_AVX2 && (!CONFIG_PARALLEL_DEBLOCKING)
+#if HAVE_AVX2
 #if CONFIG_HIGHBITDEPTH
 const dualloop8_param_t kHbdLoop8Test9Avx2[] = {
   make_tuple(&aom_highbd_lpf_horizontal_4_dual_avx2,