Enable temporal filter intrinsics for YUV422
The intrinsic variants for av1_apply_temporal_filter() were
disabled for YUV 4:2:2 format. This CL modifies the assertion
which was causing the issue and re-enables the intrinsics.
BUG=aomedia:2643
Change-Id: I8b5f525f650abd62271c11c16a4614c956eb8aca
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 59a4570..4a5d2f1 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -814,17 +814,6 @@
}
MB_MODE_INFO **input_mb_mode_info = mbd->mi;
- // Determine whether the video is with `YUV 4:2:2` format, since the avx2/sse2
- // function only supports square block size. We will use C function instead
- // for videos with `YUV 4:2:2` format.
- int is_yuv422_format = 0;
- for (int plane = 1; plane < num_planes; ++plane) {
- if (mbd->plane[plane].subsampling_x != mbd->plane[plane].subsampling_y) {
- is_yuv422_format = 1;
- break;
- }
- }
-
// Setup.
mbd->block_ref_scale_factors[0] = scale;
mbd->block_ref_scale_factors[1] = scale;
@@ -882,13 +871,10 @@
} else { // Other reference frames.
// TODO(any): avx2/sse2 version should be changed to align with C
// function before using. In particular, current avx2/sse2 function
- // only supports 32x32 block size, 5x5 filtering window, 8-bit
- // encoding, and the case when the video is not with `YUV 4:2:2`
- // format.
+ // only supports 32x32 block size and 5x5 filtering window.
if (is_frame_high_bitdepth(frame_to_filter)) { // for high bit-depth
#if CONFIG_AV1_HIGHBITDEPTH
- if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5 &&
- !is_yuv422_format) {
+ if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
av1_highbd_apply_temporal_filter(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
noise_levels, subblock_mvs, subblock_mses, q_factor,
@@ -903,8 +889,7 @@
}
#endif // CONFIG_AV1_HIGHBITDEPTH
} else { // for 8-bit
- if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5 &&
- !is_yuv422_format) {
+ if (TF_BLOCK_SIZE == BLOCK_32X32 && TF_WINDOW_LENGTH == 5) {
av1_apply_temporal_filter(
frame_to_filter, mbd, block_size, mb_row, mb_col, num_planes,
noise_levels, subblock_mvs, subblock_mses, q_factor,
diff --git a/av1/encoder/x86/highbd_temporal_filter_sse2.c b/av1/encoder/x86/highbd_temporal_filter_sse2.c
index c0d214f..6ee418a 100644
--- a/av1/encoder/x86/highbd_temporal_filter_sse2.c
+++ b/av1/encoder/x86/highbd_temporal_filter_sse2.c
@@ -96,8 +96,8 @@
uint32_t *frame_sse, uint32_t *luma_sse_sum, int bd,
const double inv_num_ref_pixels, const double decay_factor,
const double inv_factor, const double weight_factor, double *d_factor) {
- assert(((block_width == 32) && (block_height == 32)) ||
- ((block_width == 16) && (block_height == 16)));
+ assert(((block_width == 16) || (block_width == 32)) &&
+ ((block_height == 16) || (block_height == 32)));
uint32_t acc_5x5_sse[BH][BW];
@@ -212,8 +212,8 @@
const int *subblock_mses, const int q_factor, const int filter_strength,
const uint8_t *pred, uint32_t *accum, uint16_t *count) {
const int is_high_bitdepth = frame_to_filter->flags & YV12_FLAG_HIGHBITDEPTH;
- assert(block_size == BLOCK_32X32 && "Only support 32x32 block with avx2!");
- assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with avx2!");
+ assert(block_size == BLOCK_32X32 && "Only support 32x32 block with sse2!");
+ assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with sse2!");
assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
(void)is_high_bitdepth;
diff --git a/av1/encoder/x86/temporal_filter_avx2.c b/av1/encoder/x86/temporal_filter_avx2.c
index 65f7fe4..8b213e6 100644
--- a/av1/encoder/x86/temporal_filter_avx2.c
+++ b/av1/encoder/x86/temporal_filter_avx2.c
@@ -134,8 +134,8 @@
uint16_t *frame_sse, uint32_t *luma_sse_sum,
const double inv_num_ref_pixels, const double decay_factor,
const double inv_factor, const double weight_factor, double *d_factor) {
- assert(((block_width == 32) && (block_height == 32)) ||
- ((block_width == 16) && (block_height == 16)));
+ assert(((block_width == 16) || (block_width == 32)) &&
+ ((block_height == 16) || (block_height == 32)));
uint32_t acc_5x5_sse[BH][BW];
diff --git a/av1/encoder/x86/temporal_filter_sse2.c b/av1/encoder/x86/temporal_filter_sse2.c
index b366d0f..9d9a1d2 100644
--- a/av1/encoder/x86/temporal_filter_sse2.c
+++ b/av1/encoder/x86/temporal_filter_sse2.c
@@ -109,8 +109,8 @@
uint16_t *frame_sse, uint32_t *luma_sse_sum,
const double inv_num_ref_pixels, const double decay_factor,
const double inv_factor, const double weight_factor, double *d_factor) {
- assert(((block_width == 32) && (block_height == 32)) ||
- ((block_width == 16) && (block_height == 16)));
+ assert(((block_width == 16) || (block_width == 32)) &&
+ ((block_height == 16) || (block_height == 32)));
uint32_t acc_5x5_sse[BH][BW];
@@ -199,8 +199,8 @@
const int *subblock_mses, const int q_factor, const int filter_strength,
const uint8_t *pred, uint32_t *accum, uint16_t *count) {
const int is_high_bitdepth = frame_to_filter->flags & YV12_FLAG_HIGHBITDEPTH;
- assert(block_size == BLOCK_32X32 && "Only support 32x32 block with avx2!");
- assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with avx2!");
+ assert(block_size == BLOCK_32X32 && "Only support 32x32 block with sse2!");
+ assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with sse2!");
assert(!is_high_bitdepth && "Only support low bit-depth with sse2!");
assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
(void)is_high_bitdepth;