Clean up aom_subpixel_8t_intrin_sse2.c

Removed commented code from aom_filter_block1d16_h4_sse2 and
aom_filter_block1d16_v4_sse2 function.
Modified the unit tests to use USE_4_TAPS and USE_8_TAPS macro.

Change-Id: I9f251cc10b0846ad29f719b320bdbbf2b2e56458
diff --git a/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c b/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
index d307312..cff7f43 100644
--- a/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
+++ b/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
@@ -31,9 +31,6 @@
   addFilterReg32 = _mm_set1_epi16(32);
   filtersReg = _mm_loadu_si128((const __m128i *)filter);
   filtersReg = _mm_srai_epi16(filtersReg, 1);
-  // converting the 16 bit (short) to 8 bit (byte) and have the same data
-  // in both lanes of 128 bit register.
-  // filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
 
   // coeffs 0 1 0 1 2 3 2 3
   const __m128i tmp_0 = _mm_unpacklo_epi32(filtersReg, filtersReg);
@@ -127,8 +124,6 @@
 
   addFilterReg32 = _mm_set1_epi16(32);
   filtersReg = _mm_loadu_si128((const __m128i *)filter);
-  // converting the 16 bit (short) to  8 bit (byte) and have the
-  // same data in both lanes of 128 bit register.
   filtersReg = _mm_srai_epi16(filtersReg, 1);
 
   // coeffs 0 1 0 1 2 3 2 3
@@ -139,7 +134,7 @@
   secondFilters = _mm_unpackhi_epi64(tmp0, tmp0);  // coeffs 2 3 2 3 2 3 2 3
   thirdFilters = _mm_unpacklo_epi64(tmp1, tmp1);   // coeffs 4 5 4 5 4 5 4 5
 
-  // multiple the size of the source and destination stride by two
+  // multiply the size of the source and destination stride by two
   src_stride = src_pitch << 1;
   dst_stride = out_pitch << 1;
 
diff --git a/test/comp_avg_pred_test.h b/test/comp_avg_pred_test.h
index 54bf281..dd53e0a 100644
--- a/test/comp_avg_pred_test.h
+++ b/test/comp_avg_pred_test.h
@@ -215,7 +215,8 @@
     jnt_comp_params.use_jnt_comp_avg = 1;
     int sub_x_q3, sub_y_q3;
     int subpel_search;
-    for (subpel_search = 1; subpel_search <= 2; ++subpel_search) {
+    for (subpel_search = USE_4_TAPS; subpel_search <= USE_8_TAPS;
+         ++subpel_search) {
       for (sub_x_q3 = 0; sub_x_q3 < 8; ++sub_x_q3) {
         for (sub_y_q3 = 0; sub_y_q3 < 8; ++sub_y_q3) {
           for (int ii = 0; ii < 2; ii++) {
@@ -283,7 +284,7 @@
     const int num_loops = 1000000000 / (in_w + in_h);
     aom_usec_timer timer;
     aom_usec_timer_start(&timer);
-    int subpel_search = 2;  // set to 1 to test 4-tap filter.
+    int subpel_search = USE_8_TAPS;  // set to USE_4_TAPS to test 4-tap filter.
 
     for (int i = 0; i < num_loops; ++i)
       aom_jnt_comp_avg_upsampled_pred_c(NULL, NULL, 0, 0, NULL, output, pred8,