ALT_INTRA: Integerize the weights for SMOOTH_PRED.

Insignificant change in BDRate.

Change-Id: Id1aa798393fd4c4c174dfcb9a8315828b531996f
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index 1b0bca4..50b6a08 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -9,6 +9,7 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+#include <assert.h>
 #include <math.h>
 
 #include "./aom_config.h"
@@ -259,61 +260,75 @@
   }
 }
 
-// Weights are quadratic from 'bs' to '1'.
-// Scale is same as 'bs'.
-// TODO(urvang): Integerize the weights at a suitable precision.
+// Weights are quadratic from 'bs' to '1', scaled by 2^12.
+// TODO(urvang): All weights can be at the same scale: going from '1' to '1/bs'
+// instead (still scaled by 2^12 or more).
+// Rationale: Given that max block dimension is 64 (=2^6), and max pixel value
+// is below 2^12 (for both normal and highbitdepth), power of (31 - 6 - 12 - 1)
+// = 12 is chosen so that all weighted sums in smooth_predictor() remain within
+// 2^31 (unsigned integer) range.
+static const int sm_weight_log2_scale = 12;
+
 #if CONFIG_TX64X64
-static const double sm_weight_arrays[6][64] = {
+static const uint32_t sm_weight_arrays[6][64] = {
 #else
-static const double sm_weight_arrays[5][32] = {
+static const uint32_t sm_weight_arrays[5][32] = {
 #endif  // CONFIG_TX64X64
   // bs = 2
-  { 2, 1 },
+  { 8192, 4096 },
   // bs = 4
-  { 4, 2.33333, 1.33333, 1 },
+  { 16384, 9557, 5461, 4096 },
   // bs = 8
-  { 8, 6.14286, 4.57143, 3.28571, 2.28571, 1.57143, 1.14286, 1 },
+  { 32768, 25161, 18725, 13458, 9362, 6437, 4681, 4096 },
   // bs = 16
-  { 16, 14.0667, 12.2667, 10.6, 9.06667, 7.66667, 6.4, 5.26667, 4.26667, 3.4,
-    2.66667, 2.06667, 1.6, 1.26667, 1.06667, 1 },
+  { 65536, 57617, 50244, 43418, 37137, 31403, 26214, 21572, 17476, 13926, 10923,
+    8465, 6554, 5188, 4369, 4096 },
   // bs = 32
-  { 32,      30.0323, 28.129,  26.2903, 24.5161, 22.8065, 21.1613, 19.5806,
-    18.0645, 16.6129, 15.2258, 13.9032, 12.6452, 11.4516, 10.3226, 9.25806,
-    8.25806, 7.32258, 6.45161, 5.64516, 4.90323, 4.22581, 3.6129,  3.06452,
-    2.58065, 2.16129, 1.80645, 1.51613, 1.29032, 1.12903, 1.03226, 1 },
+  { 131072, 123012, 115217, 107685, 100418, 93415, 86677, 80202,
+    73992,  68046,  62365,  56948,  51795,  46906, 42281, 37921,
+    33825,  29993,  26426,  23123,  20084,  17309, 14798, 12552,
+    10570,  8853,   7399,   6210,   5285,   4625,  4228,  4096 },
 #if CONFIG_TX64X64
   // bs = 64
-  { 64,      62.0159, 60.0635, 58.1429, 56.254,  54.3968, 52.5714, 50.7778,
-    49.0159, 47.2857, 45.5873, 43.9206, 42.2857, 40.6825, 39.1111, 37.5714,
-    36.0635, 34.5873, 33.1429, 31.7302, 30.3492, 29,      27.6825, 26.3968,
-    25.1429, 23.9206, 22.7302, 21.5714, 20.4444, 19.3492, 18.2857, 17.254,
-    16.254,  15.2857, 14.3492, 13.4444, 12.5714, 11.7302, 10.9206, 10.1429,
-    9.39683, 8.68254, 8,       7.34921, 6.73016, 6.14286, 5.5873,  5.06349,
-    4.57143, 4.11111, 3.68254, 3.28571, 2.92063, 2.5873,  2.28571, 2.01587,
-    1.77778, 1.57143, 1.39683, 1.25397, 1.14286, 1.06349, 1.01587, 1 },
+  { 262144, 254017, 246020, 238153, 230416, 222809, 215333, 207986,
+    200769, 193682, 186726, 179899, 173202, 166636, 160199, 153893,
+    147716, 141670, 135753, 129967, 124310, 118784, 113388, 108121,
+    102985, 97979,  93103,  88357,  83740,  79254,  74898,  70672,
+    66576,  62610,  58774,  55068,  51493,  48047,  44731,  41545,
+    38489,  35564,  32768,  30102,  27567,  25161,  22886,  20740,
+    18725,  16839,  15084,  13458,  11963,  10598,  9362,   8257,
+    7282,   6437,   5721,   5136,   4681,   4356,   4161,   4096 },
 #endif  // CONFIG_TX64X64
 };
 
+#define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))
+
 static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
                                     const uint8_t *above, const uint8_t *left) {
   const uint8_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
   const uint8_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = (int)lround(log2(bs)) - 1;
-  const double *const sm_weights = sm_weight_arrays[arr_index];
-  const double scale = 2.0 * bs;
+  const int log2_bs = (int)lround(log2(bs));
+  const int arr_index = log2_bs - 1;
+  const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
+  // scale = 2 * bs * 2^sm_weight_log2_scale
+  const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
+  assert(log2_scale + 8 < 8 * 31);  // sanity check: no overflow.
+  const uint32_t scaled_bs = sm_weights[0];
+  assert((int)scaled_bs == (bs << sm_weight_log2_scale));
   int r;
   for (r = 0; r < bs; ++r) {
     int c;
     for (c = 0; c < bs; ++c) {
-      const int pixels[] = { above[c], below_pred, left[r], right_pred };
-      const double weights[] = { sm_weights[r], bs - sm_weights[r],
-                                 sm_weights[c], bs - sm_weights[c] };
-      double this_pred = 0;
+      const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
+      const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
+                                   sm_weights[c], scaled_bs - sm_weights[c] };
+      uint32_t this_pred = 0;
       int i;
+      assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
       for (i = 0; i < 4; ++i) {
         this_pred += weights[i] * pixels[i];
       }
-      dst[c] = clip_pixel(lround(this_pred / scale));
+      dst[c] = clip_pixel(divide_round(this_pred, log2_scale));
     }
     dst += stride;
   }
@@ -1027,22 +1042,28 @@
                                            const uint16_t *left, int bd) {
   const uint16_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
   const uint16_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = (int)lround(log2(bs)) - 1;
-  const double *const sm_weights = sm_weight_arrays[arr_index];
-  const double scale = 2.0 * bs;
+  const int log2_bs = (int)lround(log2(bs));
+  const int arr_index = log2_bs - 1;
+  const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
+  // scale = 2 * bs * 2^sm_weight_log2_scale
+  const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
+  assert(log2_scale + 8 < 8 * 31);  // sanity check: no overflow.
+  const uint32_t scaled_bs = sm_weights[0];
+  assert((int)scaled_bs == (bs << sm_weight_log2_scale));
   int r;
   for (r = 0; r < bs; ++r) {
     int c;
     for (c = 0; c < bs; ++c) {
-      const int pixels[] = { above[c], below_pred, left[r], right_pred };
-      const double weights[] = { sm_weights[r], bs - sm_weights[r],
-                                 sm_weights[c], bs - sm_weights[c] };
-      double this_pred = 0;
+      const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
+      const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
+                                   sm_weights[c], scaled_bs - sm_weights[c] };
+      uint32_t this_pred = 0;
       int i;
+      assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
       for (i = 0; i < 4; ++i) {
         this_pred += weights[i] * pixels[i];
       }
-      dst[c] = clip_pixel_highbd(lround(this_pred / scale), bd);
+      dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd);
     }
     dst += stride;
   }
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index ddaeb56..ce4b3c1 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -129,7 +129,7 @@
     "95f7bfc262329a5849eda66d8f7c68ce",
 #if CONFIG_ALT_INTRA
     "f6ade499c626d38eb70661184b79bc57",
-    "28a52163fa8bd2216e6af1ce3113af09"
+    "f9217748b7188479c2990e42d2dc1da1"
 #else
     "815b75c8e0d91cc1ae766dc5d3e445a3",
 #endif  // CONFIG_ALT_INTRA
@@ -154,7 +154,7 @@
     "a8fe1c70432f09d0c20c67bdb6432c4d",
 #if CONFIG_ALT_INTRA
     "7adcaaa3554eb71a81fc48cb9043984b",
-    "3f83cda25a2c1647e1b48803922c33df"
+    "de44142b9670ab7c85d4c318c47257e5"
 #else
     "b8a41aa968ec108af447af4217cba91b",
 #endif  // CONFIG_ALT_INTRA