Limit intermediate precision bits correctly The intermediate precision must be correctly limited since the current implementation stores the intremidiate filtered data in signed 16 bit buffers. Therefore for n-bit sources, the intermediate extra precision bits must be at most 15-n. This is now needed since the intermediate extra precision is now an user configurable parameter. Change-Id: Ie87f6aaaa66af335b945765d441125ab494dfaa1

commit: ed1cbc084f78a5dbc50e400ad56b9b351d26cf1e [log] [tgz]
author: Debargha Mukherjee <debargha@google.com> Mon Jan 04 21:47:33 2021 -0800
committer: Debargha Mukherjee <debargha@google.com> Tue Jan 05 12:18:47 2021 -0800
tree: 66d48fd3a20c9b627386db52fbfe05885193f079
parent: a09fe322840982ac9a4e26ee6a2b22e55c77ec1b [diff]
diff --git a/tools/lanczos/lanczos_README.txt b/tools/lanczos/lanczos_README.txt
index b37dfdb..75b2da6 100644
--- a/tools/lanczos/lanczos_README.txt
+++ b/tools/lanczos/lanczos_README.txt

@@ -52,6 +52,7 @@
                                  [default: 14]
           -ieb:<n>        - providing intermediate extra bits of
                             prec between horz and vert filtering
+                            clamped to maximum of (15 - bitdepth)
                                  [default: 2]
           -ext:<ext_type> - providing the extension type
                <ext_type> is one of:

diff --git a/tools/lanczos/lanczos_resample.c b/tools/lanczos/lanczos_resample.c
index 6f321ef..c529fdf 100644
--- a/tools/lanczos/lanczos_resample.c
+++ b/tools/lanczos/lanczos_resample.c

@@ -26,6 +26,9 @@
   (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
                  : ROUND_POWER_OF_TWO((value), (n)))
 
+#define MAX(a, b) ((a) < (b) ? (b) : (a))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
 #ifndef M_PI
 #define M_PI (3.14159265358979323846)
 #endif
@@ -343,11 +346,13 @@
     for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
       sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
     }
-    y[i] = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
+    sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
     if (clip) {
-      y[i] = (int16_t)clip->issigned ? doclip(y[i], -(1 << (clip->bits - 1)),
+      y[i] = (int16_t)clip->issigned ? doclip(sum, -(1 << (clip->bits - 1)),
                                               (1 << (clip->bits - 1)) - 1)
-                                     : doclip(y[i], 0, (1 << clip->bits) - 1);
+                                     : doclip(sum, 0, (1 << clip->bits) - 1);
+    } else {
+      y[i] = (int16_t)sum;
     }
     xext += rf->steps[p];
   }
@@ -472,6 +477,8 @@
   int16_t *tmparrv = tmparr_ + outheight + rfv->length / 2;
   int16_t *tmparro = tmparr_;
   int tmpstride = outwidth;
+  // intermediate data is stored in 16 bit buffers, so limit int_extra_bits
+  int_extra_bits = MIN(int_extra_bits, 15 - clip->bits);
   const int downshifth = rfh->filter_bits - int_extra_bits;
   const int downshiftv = rfh->filter_bits + int_extra_bits;
   for (int i = 0; i < inheight; ++i) {
@@ -531,11 +538,13 @@
     for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
       sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
     }
-    y[i] = (int16_t)ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
+    sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
     if (clip) {
-      y[i] = (int16_t)clip->issigned ? doclip(y[i], -(1 << (clip->bits - 1)),
+      y[i] = (int16_t)clip->issigned ? doclip(sum, -(1 << (clip->bits - 1)),
                                               (1 << (clip->bits - 1)) - 1)
-                                     : doclip(y[i], 0, (1 << clip->bits) - 1);
+                                     : doclip(sum, 0, (1 << clip->bits) - 1);
+    } else {
+      y[i] = (int16_t)sum;
     }
     xext += rf->steps[p];
   }
@@ -555,11 +564,13 @@
     for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
       sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
     }
-    y[i] = (uint8_t)ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
+    sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
     if (clip) {
-      y[i] = (uint8_t)clip->issigned ? doclip(y[i], -(1 << (clip->bits - 1)),
+      y[i] = (uint8_t)clip->issigned ? doclip(sum, -(1 << (clip->bits - 1)),
                                               (1 << (clip->bits - 1)) - 1)
-                                     : doclip(y[i], 0, (1 << clip->bits) - 1);
+                                     : doclip(sum, 0, (1 << clip->bits) - 1);
+    } else {
+      y[i] = (uint8_t)sum;
     }
     xext += rf->steps[p];
   }
@@ -706,6 +717,8 @@
   int16_t *tmparrv = tmparr_ + outheight + rfv->length / 2;
   int16_t *tmparro = tmparr_;
   int tmpstride = outwidth;
+  // intermediate data is stored in 16 bit buffers, so limit int_extra_bits
+  int_extra_bits = MIN(int_extra_bits, 15 - clip->bits);
   const int downshifth = rfh->filter_bits - int_extra_bits;
   const int downshiftv = rfh->filter_bits + int_extra_bits;
   for (int i = 0; i < inheight; ++i) {

diff --git a/tools/lanczos/lanczos_resample_filter.c b/tools/lanczos/lanczos_resample_filter.c
index e4d246f..b516baf 100644
--- a/tools/lanczos/lanczos_resample_filter.c
+++ b/tools/lanczos/lanczos_resample_filter.c

@@ -42,6 +42,7 @@
   printf("                                 [default: 14]\n");
   printf("          -ieb:<n>        - providing intermediate extra bits of\n");
   printf("                            prec between horz and vert filtering\n");
+  printf("                            clamped to maximum of (15 - bitdepth)\n");
   printf("                                 [default: 2]\n");
   printf("          -ext:<ext_type> - providing the extension type\n");
   printf("               <ext_type> is one of:\n");

diff --git a/tools/lanczos/lanczos_resample_y4m.c b/tools/lanczos/lanczos_resample_y4m.c
index 852d932..814e00b 100644
--- a/tools/lanczos/lanczos_resample_y4m.c
+++ b/tools/lanczos/lanczos_resample_y4m.c

@@ -56,6 +56,7 @@
   printf("                                 [default: 14]\n");
   printf("          -ieb:<n>        - providing intermediate extra bits of\n");
   printf("                            prec between horz and vert filtering\n");
+  printf("                            clamped to maximum of (15 - bitdepth)\n");
   printf("                                 [default: 2]\n");
   printf("          -ext:<ext_type> - providing the extension type\n");
   printf("               <ext_type> is one of:\n");

diff --git a/tools/lanczos/lanczos_resample_yuv.c b/tools/lanczos/lanczos_resample_yuv.c
index ea1e17c..422c397 100644
--- a/tools/lanczos/lanczos_resample_yuv.c
+++ b/tools/lanczos/lanczos_resample_yuv.c

@@ -58,6 +58,7 @@
   printf("                                 [default: 14]\n");
   printf("          -ieb:<n>        - providing intermediate extra bits of\n");
   printf("                            prec between horz and vert filtering\n");
+  printf("                            clamped to maximum of (15 - bitdepth)\n");
   printf("                                 [default: 2]\n");
   printf("          -ext:<ext_type> - providing the extension type\n");
   printf("               <ext_type> is one of:\n");
commit	ed1cbc084f78a5dbc50e400ad56b9b351d26cf1e	[log] [tgz]
author	Debargha Mukherjee <debargha@google.com>	Mon Jan 04 21:47:33 2021 -0800
committer	Debargha Mukherjee <debargha@google.com>	Tue Jan 05 12:18:47 2021 -0800
tree	66d48fd3a20c9b627386db52fbfe05885193f079
parent	a09fe322840982ac9a4e26ee6a2b22e55c77ec1b [diff]