SMOOTH_PRED: Use 12-bit multiplications instead of 18-bit.
Compression performance is roughly neutral:
AWCY:
-----
High Latency Low Latency
All Keyframes 0.00 0.00
Video overall 0.01 -0.01
Google sets:
------------
- All Keyframes:
lowres -0.001
midres 0.000
hdres 0.001
- Video overall:
lowres 0.019
midres 0.000
hdres -0.013
Change-Id: I89be2739203bf3e2848e4ba7ae2988c625f54513
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index 50b6a08..447d72a 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -260,13 +260,7 @@
}
}
-// Weights are quadratic from 'bs' to '1', scaled by 2^12.
-// TODO(urvang): All weights can be at the same scale: going from '1' to '1/bs'
-// instead (still scaled by 2^12 or more).
-// Rationale: Given that max block dimension is 64 (=2^6), and max pixel value
-// is below 2^12 (for both normal and highbitdepth), power of (31 - 6 - 12 - 1)
-// = 12 is chosen so that all weighted sums in smooth_predictor() remain within
-// 2^31 (unsigned integer) range.
+// Weights are quadratic from '1' to '1 / block_size', scaled by 2^12.
static const int sm_weight_log2_scale = 12;
#if CONFIG_TX64X64
@@ -275,29 +269,26 @@
static const uint32_t sm_weight_arrays[5][32] = {
#endif // CONFIG_TX64X64
// bs = 2
- { 8192, 4096 },
+ { 4096, 2048 },
// bs = 4
- { 16384, 9557, 5461, 4096 },
+ { 4096, 2389, 1365, 1024 },
// bs = 8
- { 32768, 25161, 18725, 13458, 9362, 6437, 4681, 4096 },
+ { 4096, 3145, 2341, 1682, 1170, 805, 585, 512 },
// bs = 16
- { 65536, 57617, 50244, 43418, 37137, 31403, 26214, 21572, 17476, 13926, 10923,
- 8465, 6554, 5188, 4369, 4096 },
+ { 4096, 3601, 3140, 2714, 2321, 1963, 1638, 1348, 1092, 870, 683, 529, 410,
+ 324, 273, 256 },
// bs = 32
- { 131072, 123012, 115217, 107685, 100418, 93415, 86677, 80202,
- 73992, 68046, 62365, 56948, 51795, 46906, 42281, 37921,
- 33825, 29993, 26426, 23123, 20084, 17309, 14798, 12552,
- 10570, 8853, 7399, 6210, 5285, 4625, 4228, 4096 },
+ { 4096, 3844, 3601, 3365, 3138, 2919, 2709, 2506, 2312, 2126, 1949,
+ 1780, 1619, 1466, 1321, 1185, 1057, 937, 826, 723, 628, 541,
+ 462, 392, 330, 277, 231, 194, 165, 145, 132, 128 },
#if CONFIG_TX64X64
// bs = 64
- { 262144, 254017, 246020, 238153, 230416, 222809, 215333, 207986,
- 200769, 193682, 186726, 179899, 173202, 166636, 160199, 153893,
- 147716, 141670, 135753, 129967, 124310, 118784, 113388, 108121,
- 102985, 97979, 93103, 88357, 83740, 79254, 74898, 70672,
- 66576, 62610, 58774, 55068, 51493, 48047, 44731, 41545,
- 38489, 35564, 32768, 30102, 27567, 25161, 22886, 20740,
- 18725, 16839, 15084, 13458, 11963, 10598, 9362, 8257,
- 7282, 6437, 5721, 5136, 4681, 4356, 4161, 4096 },
+ { 4096, 3969, 3844, 3721, 3600, 3481, 3365, 3250, 3137, 3026, 2918,
+ 2811, 2706, 2604, 2503, 2405, 2308, 2214, 2121, 2031, 1942, 1856,
+ 1772, 1689, 1609, 1531, 1455, 1381, 1308, 1238, 1170, 1104, 1040,
+ 978, 918, 860, 805, 751, 699, 649, 601, 556, 512, 470,
+ 431, 393, 358, 324, 293, 263, 236, 210, 187, 166, 146,
+ 129, 114, 101, 89, 80, 73, 68, 65, 64 },
#endif // CONFIG_TX64X64
};
@@ -307,24 +298,23 @@
const uint8_t *above, const uint8_t *left) {
const uint8_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint8_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int log2_bs = (int)lround(log2(bs));
- const int arr_index = log2_bs - 1;
+ const int arr_index = (int)lround(log2(bs)) - 1;
const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
- // scale = 2 * bs * 2^sm_weight_log2_scale
- const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
+ // scale = 2 * 2^sm_weight_log2_scale
+ const int log2_scale = 1 + sm_weight_log2_scale;
assert(log2_scale + 8 < 8 * 31); // sanity check: no overflow.
- const uint32_t scaled_bs = sm_weights[0];
- assert((int)scaled_bs == (bs << sm_weight_log2_scale));
+ const uint32_t scale = sm_weights[0];
+ assert((int)scale == (1 << sm_weight_log2_scale));
int r;
for (r = 0; r < bs; ++r) {
int c;
for (c = 0; c < bs; ++c) {
const uint8_t pixels[] = { above[c], below_pred, left[r], right_pred };
- const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
- sm_weights[c], scaled_bs - sm_weights[c] };
+ const uint32_t weights[] = { sm_weights[r], scale - sm_weights[r],
+ sm_weights[c], scale - sm_weights[c] };
uint32_t this_pred = 0;
int i;
- assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
+ assert(scale >= sm_weights[r] && scale >= sm_weights[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
@@ -1042,24 +1032,23 @@
const uint16_t *left, int bd) {
const uint16_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int log2_bs = (int)lround(log2(bs));
- const int arr_index = log2_bs - 1;
+ const int arr_index = (int)lround(log2(bs)) - 1;
const uint32_t *const sm_weights = sm_weight_arrays[arr_index];
- // scale = 2 * bs * 2^sm_weight_log2_scale
- const int log2_scale = 1 + log2_bs + sm_weight_log2_scale;
+ // scale = 2 * 2^sm_weight_log2_scale
+ const int log2_scale = 1 + sm_weight_log2_scale;
assert(log2_scale + 8 < 8 * 31); // sanity check: no overflow.
- const uint32_t scaled_bs = sm_weights[0];
- assert((int)scaled_bs == (bs << sm_weight_log2_scale));
+ const uint32_t scale = sm_weights[0];
+ assert((int)scale == (1 << sm_weight_log2_scale));
int r;
for (r = 0; r < bs; ++r) {
int c;
for (c = 0; c < bs; ++c) {
const uint16_t pixels[] = { above[c], below_pred, left[r], right_pred };
- const uint32_t weights[] = { sm_weights[r], scaled_bs - sm_weights[r],
- sm_weights[c], scaled_bs - sm_weights[c] };
+ const uint32_t weights[] = { sm_weights[r], scale - sm_weights[r],
+ sm_weights[c], scale - sm_weights[c] };
uint32_t this_pred = 0;
int i;
- assert(scaled_bs >= sm_weights[r] && scaled_bs >= sm_weights[c]);
+ assert(scale >= sm_weights[r] && scale >= sm_weights[c]);
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
diff --git a/test/test_intra_pred_speed.cc b/test/test_intra_pred_speed.cc
index 5c8c554..5046bc9 100644
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -179,7 +179,7 @@
"f162b51ed618d28b936974cff4391da5",
#if CONFIG_ALT_INTRA
"297e8fbb5d33c29b12b228fa9d7c40a4",
- "7177dd1ae3b49441f997d439a5bd451a"
+ "a08d5b7e104c5fc2b203789ee5f725a7"
#else
"9e1370c6d42e08d357d9612c93a71cfc",
#endif // CONFIG_ALT_INTRA