Enable temporal filter intrinsics for YUV422

The intrinsic variants for av1_apply_temporal_filter() were
disabled for YUV 4:2:2 format. This CL modifies the assertion
which was causing the issue and re-enables the intrinsics.

BUG=aomedia:2643

Change-Id: I8b5f525f650abd62271c11c16a4614c956eb8aca
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 59a4570..4a5d2f1 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -814,17 +814,6 @@
   }
   MB_MODE_INFO **input_mb_mode_info = mbd->mi;
 
-  // Determine whether the video is with `YUV 4:2:2` format, since the avx2/sse2
-  // function only supports square block size. We will use C function instead
-  // for videos with `YUV 4:2:2` format.
-  int is_yuv422_format = 0;
-  for (int plane = 1; plane < num_planes; ++plane) {
-    if (mbd->plane[plane].subsampling_x != mbd->plane[plane].subsampling_y) {
-      is_yuv422_format = 1;
-      break;
-    }
-  }
-
   // Setup.
   mbd->block_ref_scale_factors[0] = scale;
   mbd->block_ref_scale_factors[1] = scale;
@@ -882,13 +871,10 @@
         } else {  // Other reference frames.
           // TODO(any): avx2/sse2 version should be changed to align with C
           // function before using. In particular, current avx2/sse2 function
-          // only supports 32x32 block size, 5x5 filtering window, 8-bit
-          // encoding, and the case when the video is not with `YUV 4:2:2`
-          // format.
+          // only supports 32x32 block size and 5x5 filtering window.
           if (is_frame_high_bitdepth(frame_to_filter)) {  // for high bit-depth
 #if CONFIG_AV1_HIGHBITDEPTH
-            if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5 &&
-                !is_yuv422_format) {
+            if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
               av1_highbd_apply_temporal_filter(
                   frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
                   noise_levels, subblock_mvs, subblock_mses, q_factor,
@@ -903,8 +889,7 @@
             }
 #endif              // CONFIG_AV1_HIGHBITDEPTH
           } else {  // for 8-bit
-            if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5 &&
-                !is_yuv422_format) {
+            if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
               av1_apply_temporal_filter(
                   frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
                   noise_levels, subblock_mvs, subblock_mses, q_factor,
diff --git a/av1/encoder/x86/highbd_temporal_filter_sse2.c b/av1/encoder/x86/highbd_temporal_filter_sse2.c
index c0d214f..6ee418a 100644
--- a/av1/encoder/x86/highbd_temporal_filter_sse2.c
+++ b/av1/encoder/x86/highbd_temporal_filter_sse2.c
@@ -96,8 +96,8 @@
     uint32_t *frame_sse, uint32_t *luma_sse_sum, int bd,
     const double inv_num_ref_pixels, const double decay_factor,
     const double inv_factor, const double weight_factor, double *d_factor) {
-  assert(((block_width == 32) && (block_height == 32)) ||
-         ((block_width == 16) && (block_height == 16)));
+  assert(((block_width == 16) || (block_width == 32)) &&
+         ((block_height == 16) || (block_height == 32)));
 
   uint32_t acc_5x5_sse[BH][BW];
 
@@ -212,8 +212,8 @@
     const int *subblock_mses, const int q_factor, const int filter_strength,
     const uint8_t *pred, uint32_t *accum, uint16_t *count) {
   const int is_high_bitdepth = frame_to_filter->flags & YV12_FLAG_HIGHBITDEPTH;
-  assert(block_size == BLOCK_32X32 && "Only support 32x32 block with avx2!");
-  assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with avx2!");
+  assert(block_size == BLOCK_32X32 && "Only support 32x32 block with sse2!");
+  assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with sse2!");
   assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
   (void)is_high_bitdepth;
 
diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index 65f7fe4..8b213e6 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c
@@ -134,8 +134,8 @@
     uint16_t *frame_sse, uint32_t *luma_sse_sum,
     const double inv_num_ref_pixels, const double decay_factor,
     const double inv_factor, const double weight_factor, double *d_factor) {
-  assert(((block_width == 32) && (block_height == 32)) ||
-         ((block_width == 16) && (block_height == 16)));
+  assert(((block_width == 16) || (block_width == 32)) &&
+         ((block_height == 16) || (block_height == 32)));
 
   uint32_t acc_5x5_sse[BH][BW];
 
diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index b366d0f..9d9a1d2 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c
@@ -109,8 +109,8 @@
     uint16_t *frame_sse, uint32_t *luma_sse_sum,
     const double inv_num_ref_pixels, const double decay_factor,
     const double inv_factor, const double weight_factor, double *d_factor) {
-  assert(((block_width == 32) && (block_height == 32)) ||
-         ((block_width == 16) && (block_height == 16)));
+  assert(((block_width == 16) || (block_width == 32)) &&
+         ((block_height == 16) || (block_height == 32)));
 
   uint32_t acc_5x5_sse[BH][BW];
 
@@ -199,8 +199,8 @@
     const int *subblock_mses, const int q_factor, const int filter_strength,
     const uint8_t *pred, uint32_t *accum, uint16_t *count) {
   const int is_high_bitdepth = frame_to_filter->flags & YV12_FLAG_HIGHBITDEPTH;
-  assert(block_size == BLOCK_32X32 && "Only support 32x32 block with avx2!");
-  assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with avx2!");
+  assert(block_size == BLOCK_32X32 && "Only support 32x32 block with sse2!");
+  assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with sse2!");
   assert(!is_high_bitdepth && "Only support low bit-depth with sse2!");
   assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
   (void)is_high_bitdepth;