Remove unnecessary clamping in warp filter The affine warp filter still has logic to clamp the intermediate bit width to 16 bits. This is unnecessary, as this condition is already included in the calculation of conv_params->round_0. Therefore replace this clamping with an assertion that the value is already in range. Change-Id: I672f98ae45a2d3a28b952e28719fa74033c34e56

commit: 03976dc7bb69c6a0c89a2bcf92b7666ac7a10a83 [log] [tgz]
author: Rachel Barker <rachelbarker@google.com> Tue Nov 14 21:34:53 2023 +0000
committer: Rachel Barker <rachelbarker@google.com> Wed Nov 15 18:21:42 2023 +0000
tree: d05cec10726e96d7ec0ca674cad30aa2050875cd
parent: 0bc6efc1409aa33c99fc537acab3f35879291896 [diff]
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index f376e16..4282b92 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c

@@ -291,9 +291,7 @@
                               ConvolveParams *conv_params, int16_t alpha,
                               int16_t beta, int16_t gamma, int16_t delta) {
   int32_t tmp[15 * 8];
-  const int reduce_bits_horiz =
-      conv_params->round_0 +
-      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+  const int reduce_bits_horiz = conv_params->round_0;
   const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
@@ -306,6 +304,10 @@
   (void)max_bits_horiz;
   assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
+  // Check that, even with 12-bit input, the intermediate values will fit
+  // into an unsigned 16-bit intermediate array.
+  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+
   for (int i = p_row; i < p_row + p_height; i += 8) {
     for (int j = p_col; j < p_col + p_width; j += 8) {
       // Calculate the center of this 8x8 block,

diff --git a/av1/common/x86/highbd_warp_affine_avx2.c b/av1/common/x86/highbd_warp_affine_avx2.c
index 7f6aceb..75108b4 100644
--- a/av1/common/x86/highbd_warp_affine_avx2.c
+++ b/av1/common/x86/highbd_warp_affine_avx2.c

@@ -22,9 +22,7 @@
                                  ConvolveParams *conv_params, int16_t alpha,
                                  int16_t beta, int16_t gamma, int16_t delta) {
   __m256i tmp[15];
-  const int reduce_bits_horiz =
-      conv_params->round_0 +
-      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+  const int reduce_bits_horiz = conv_params->round_0;
   const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
@@ -37,6 +35,10 @@
   (void)max_bits_horiz;
   assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
+  // Check that, even with 12-bit input, the intermediate values will fit
+  // into an unsigned 16-bit intermediate array.
+  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+
   const __m256i clip_pixel =
       _mm256_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
   const __m128i reduce_bits_vert_shift = _mm_cvtsi32_si128(reduce_bits_vert);

diff --git a/av1/common/x86/highbd_warp_plane_sse4.c b/av1/common/x86/highbd_warp_plane_sse4.c
index 9df0ddc..96fb4cf 100644
--- a/av1/common/x86/highbd_warp_plane_sse4.c
+++ b/av1/common/x86/highbd_warp_plane_sse4.c

@@ -302,9 +302,7 @@
                                    int16_t beta, int16_t gamma, int16_t delta) {
   __m128i tmp[15];
   int i, j, k;
-  const int reduce_bits_horiz =
-      conv_params->round_0 +
-      AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
+  const int reduce_bits_horiz = conv_params->round_0;
   const int reduce_bits_vert = conv_params->is_compound
                                    ? conv_params->round_1
                                    : 2 * FILTER_BITS - reduce_bits_horiz;
@@ -313,6 +311,10 @@
   assert(!(bd == 12 && reduce_bits_horiz < 5));
   assert(IMPLIES(conv_params->do_average, conv_params->is_compound));
 
+  // Check that, even with 12-bit input, the intermediate values will fit
+  // into an unsigned 16-bit intermediate array.
+  assert(bd + FILTER_BITS + 2 - conv_params->round_0 <= 16);
+
   const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
   const __m128i clip_pixel =
       _mm_set1_epi16(bd == 10 ? 1023 : (bd == 12 ? 4095 : 255));
commit	03976dc7bb69c6a0c89a2bcf92b7666ac7a10a83	[log] [tgz]
author	Rachel Barker <rachelbarker@google.com>	Tue Nov 14 21:34:53 2023 +0000
committer	Rachel Barker <rachelbarker@google.com>	Wed Nov 15 18:21:42 2023 +0000
tree	d05cec10726e96d7ec0ca674cad30aa2050875cd
parent	0bc6efc1409aa33c99fc537acab3f35879291896 [diff]