ALT_INTRA: Integerize the weights for SMOOTH_PRED.
Insignificant change in BDRate.
Change-Id: Id1aa798393fd4c4c174dfcb9a8315828b531996f
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index 1b0bca4..50b6a08 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -9,6 +9,7 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
+#include <assert.h>
#include <math.h>
#include "./aom_config.h"
@@ -259,61 +260,75 @@
}
}
-// Weights are quadratic from 'bs' to '1'.
-// Scale is same as 'bs'.
-// TODO(urvang): Integerize the weights at a suitable precision.
+// Weights are quadratic from 'bs' to '1', scaled by 2^12.
+// TODO(urvang): All weights can be at the same scale: going from '1' to '1/bs'
+// instead (still scaled by 2^12 or more).
+// Rationale: Given that max block dimension is 64 (=2^6), and max pixel value
+// is below 2^12 (for both normal and highbitdepth), power of (31 - 6 - 12 - 1)
+// = 12 is chosen so that all weighted sums in smooth_predictor() remain within
+// 2^31 (unsigned integer) range.
+static const int sm_weight_log2_scale = 12;
+
#if CONFIG_TX64X64
-static const double sm_weight_arrays[6][64] = {
+static const uint32_t sm_weight_arrays[6][64] = {
#else
-static const double sm_weight_arrays[5][32] = {
+static const uint32_t sm_weight_arrays[5][32] = {
#endif // CONFIG_TX64X64
// bs = 2
- { 2, 1 },
+ { 8192, 4096 },
// bs = 4
- { 4, 2.33333, 1.33333, 1 },
+ { 16384, 9557, 5461, 4096 },
// bs = 8
- { 8, 6.14286, 4.57143, 3.28571, 2.28571, 1.57143, 1.14286, 1 },
+ { 32768, 25161, 18725, 13458, 9362, 6437, 4681, 4096 },
// bs = 16
- { 16, 14.0667, 12.2667, 10.6, 9.06667, 7.66667, 6.4, 5.26667, 4.26667, 3.4,
- 2.66667, 2.06667, 1.6, 1.26667, 1.06667, 1 },
+ { 65536, 57617, 50244, 43418, 37137, 31403, 26214, 21572, 17476, 13926, 10923,
+ 8465, 6554, 5188, 4369, 4096 },
// bs = 32
- { 32, 30.0323, 28.129, 26.2903, 24.5161, 22.8065, 21.1613, 19.5806,
- 18.0645, 16.6129, 15.2258, 13.9032, 12.6452, 11.4516, 10.3226, 9.25806,
- 8.25806, 7.32258, 6.45161, 5.64516, 4.90323, 4.22581, 3.6129, 3.06452,
- 2.58065, 2.16129, 1.80645, 1.51613, 1.29032, 1.12903, 1.03226, 1 },
+ { 131072, 123012, 115217, 107685, 100418, 93415, 86677, 80202,
+ 73992, 68046, 62365, 56948, 51795, 46906, 42281, 37921,
+ 33825, 29993, 26426, 23123, 20084, 17309, 14798, 12552,
+ 10570, 8853, 7399, 6210, 5285, 4625, 4228, 4096 },
#if CONFIG_TX64X64
// bs = 64
- { 64, 62.0159, 60.0635, 58.1429, 56.254, 54.3968, 52.5714, 50.7778,
- 49.0159, 47.2857, 45.5873, 43.9206, 42.2857, 40.6825, 39.1111, 37.5714,
- 36.0635, 34.5873, 33.1429, 31.7302, 30.3492, 29, 27.6825, 26.3968,
- 25.1429, 23.9206, 22.7302, 21.5714, 20.4444, 19.3492, 18.2857, 17.254,
- 16.254, 15.2857, 14.3492, 13.4444, 12.5714, 11.7302, 10.9206, 10.1429,
- 9.39683, 8.68254, 8, 7.34921, 6.73016, 6.14286, 5.5873, 5.06349,
- 4.57143, 4.11111, 3.68254, 3.28571, 2.92063, 2.5873, 2.28571, 2.01587,
- 1.77778, 1.57143, 1.39683, 1.25397, 1.14286, 1.06349, 1.01587, 1 },
+ { 262144, 254017, 246020, 238153, 230416, 222809, 215333, 207986,
+ 200769, 193682, 186726, 179899, 173202, 166636, 160199, 153893,
+ 147716, 141670, 135753, 129967, 124310, 118784, 113388, 108121,
+ 102985, 97979, 93103, 88357, 83740, 79254, 74898, 70672,
+ 66576, 62610, 58774, 55068, 51493, 48047, 44731, 41545,
+ 38489, 35564, 32768, 30102, 27567, 25161, 22886, 20740,
+ 18725, 16839, 15084, 13458, 11963, 10598, 9362, 8257,
+ 7282, 6437, 5721, 5136, 4681, 4356, 4161, 4096 },
#endif // CONFIG_TX64X64
};
+#define divide_round(value, bits) (((value) + (1 << ((bits)-1))) >> (bits))
+
static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
const uint8_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint8_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int arr_index = (int)lround(log2(bs)) - 1;
- const double *const sm_weights = sm_weight_arrays[arr_index];
- const double scale = 2.0 * bs;
+ const int log2_bs = (int)lround(log2(bs));
+ const int arr_index = log2_bs - 1;
+ const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
+ // scale = 2 * bs * 2^sm_weight_log2_scale
+ const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
+ assert(log2_scale + 8 < 8 * 31); // sanity check: no overflow.
+ const uint32_t scaled_bs = sm_weights[0];
+ assert((int)scaled_bs == (bs << sm_weight_log2_scale));
int r;
for (r = 0; r < bs; ++r) {
int c;
for (c = 0; c < bs; ++c) {
- const int pixels[] = { above[c], below_pred, left[r], right_pred };
- const double weights[] = { sm_weights[r], bs - sm_weights[r],
- sm_weights[c], bs - sm_weights[c] };
- double this_pred = 0;
+ const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
+ const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
+ sm_weights[c], scaled_bs - sm_weights[c] };
+ uint32_t this_pred = 0;
int i;
+ assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel(lround(this_pred / scale));
+ dst[c] = clip_pixel(divide_round(this_pred, log2_scale));
}
dst += stride;
}
@@ -1027,22 +1042,28 @@
const uint16_t *left, int bd) {
const uint16_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int arr_index = (int)lround(log2(bs)) - 1;
- const double *const sm_weights = sm_weight_arrays[arr_index];
- const double scale = 2.0 * bs;
+ const int log2_bs = (int)lround(log2(bs));
+ const int arr_index = log2_bs - 1;
+ const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
+ // scale = 2 * bs * 2^sm_weight_log2_scale
+ const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
+ assert(log2_scale + 8 < 8 * 31); // sanity check: no overflow.
+ const uint32_t scaled_bs = sm_weights[0];
+ assert((int)scaled_bs == (bs << sm_weight_log2_scale));
int r;
for (r = 0; r < bs; ++r) {
int c;
for (c = 0; c < bs; ++c) {
- const int pixels[] = { above[c], below_pred, left[r], right_pred };
- const double weights[] = { sm_weights[r], bs - sm_weights[r],
- sm_weights[c], bs - sm_weights[c] };
- double this_pred = 0;
+ const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
+ const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
+ sm_weights[c], scaled_bs - sm_weights[c] };
+ uint32_t this_pred = 0;
int i;
+ assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel_highbd(lround(this_pred / scale), bd);
+ dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd);
}
dst += stride;
}
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index ddaeb56..ce4b3c1 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -129,7 +129,7 @@
"95f7bfc262329a5849eda66d8f7c68ce",
#if CONFIG_ALT_INTRA
"f6ade499c626d38eb70661184b79bc57",
- "28a52163fa8bd2216e6af1ce3113af09"
+ "f9217748b7188479c2990e42d2dc1da1"
#else
"815b75c8e0d91cc1ae766dc5d3e445a3",
#endif // CONFIG_ALT_INTRA
@@ -154,7 +154,7 @@
"a8fe1c70432f09d0c20c67bdb6432c4d",
#if CONFIG_ALT_INTRA
"7adcaaa3554eb71a81fc48cb9043984b",
- "3f83cda25a2c1647e1b48803922c33df"
+ "de44142b9670ab7c85d4c318c47257e5"
#else
"b8a41aa968ec108af447af4217cba91b",
#endif // CONFIG_ALT_INTRA