[M102-LTS] Use non normative scaler for non optimized ratio M102 merge issues: Had to change the author (jianj@google.com) to be able to upload the CL because of the following error: remote: ERROR: commit 83544bf: email address jianj@google.com is not registered in your account, and you lack 'forge author' permission. There are only optimized scalers for 1/4, 1/2 and 3/4 scaling ratio. SSSE3 also has 2x upscaling optimization. Use non normative scalers for all other scaling ratios. Bug: chromium:1346938 Bug: chromium:1338114 Change-Id: I2a01717b56c53c42906440d5a3f95ca2c00dc571 (cherry picked from commit ff7b753a63a536423a91b64a066bd385c52ceacc)
diff --git a/av1/common/resize.c b/av1/common/resize.c index a3c3c0e..322363fa 100644 --- a/av1/common/resize.c +++ b/av1/common/resize.c
@@ -1366,15 +1366,20 @@ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate scaled buffer"); + const bool has_optimized_scaler = av1_has_optimized_scaler( + unscaled->y_crop_width, unscaled->y_crop_height, scaled_width, + scaled_height); + #if CONFIG_AV1_HIGHBITDEPTH - if (use_optimized_scaler && cm->seq_params->bit_depth == AOM_BITS_8) { + if (use_optimized_scaler && has_optimized_scaler && + cm->seq_params->bit_depth == AOM_BITS_8) { av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes); } else { av1_resize_and_extend_frame_nonnormative( unscaled, scaled, (int)cm->seq_params->bit_depth, num_planes); } #else - if (use_optimized_scaler) { + if (use_optimized_scaler && has_optimized_scaler) { av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes); } else { av1_resize_and_extend_frame_nonnormative(
diff --git a/av1/common/resize.h b/av1/common/resize.h index 75abe62..9bc23b3 100644 --- a/av1/common/resize.h +++ b/av1/common/resize.h
@@ -105,6 +105,24 @@ return !(cm->width == cm->superres_upscaled_width); } +// There's SIMD optimizations for 1/4, 1/2 and 3/4 downscaling. +// SSSE3 also has optimizations for 2x upscaling. +// Use non normative scalers for other scaling ratios. +static INLINE bool av1_has_optimized_scaler(const int src_width, + const int src_height, + const int dst_width, + const int dst_height) { + const bool has_optimized_scaler = + (dst_width * 4 == src_width && dst_height * 4 == src_height) || + (dst_width * 2 == src_width && dst_height * 2 == src_height) || + (dst_width * 4 == src_width * 3 && dst_height * 4 == src_height * 3); +#if HAVE_SSSE3 + return has_optimized_scaler || + (dst_width == src_width * 2 && dst_height == src_height * 2); +#endif + return has_optimized_scaler; +} + #define UPSCALE_NORMATIVE_TAPS 8 extern const int16_t av1_resize_filter_normative[1 << RS_SUBPEL_BITS] [UPSCALE_NORMATIVE_TAPS];
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c index fd8be7b..cebea60 100644 --- a/av1/encoder/encoder_utils.c +++ b/av1/encoder/encoder_utils.c
@@ -733,15 +733,19 @@ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); } + const bool has_optimized_scaler = av1_has_optimized_scaler( + cm->width, cm->height, new_fb->buf.y_crop_width, + new_fb->buf.y_crop_height); #if CONFIG_AV1_HIGHBITDEPTH - if (use_optimized_scaler && cm->seq_params->bit_depth == AOM_BITS_8) + if (use_optimized_scaler && has_optimized_scaler && + cm->seq_params->bit_depth == AOM_BITS_8) av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase, num_planes); else av1_resize_and_extend_frame_nonnormative( ref, &new_fb->buf, (int)cm->seq_params->bit_depth, num_planes); #else - if (use_optimized_scaler) + if (use_optimized_scaler && has_optimized_scaler) av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase, num_planes); else