smooth_pred: 1D weights array to use less memory.
As the block sizes are powers of two, we can index into the weights
array as sm_weights_array[bs] now.
This uses 2 * MAX_BLOCK_DIM memory, instead of NUM_BLOCK_DIMS *
MAX_BLOCK_DIM earlier.
Change-Id: I55bcedc188b8ed7def719c4d002c1fe2ec5e1b7f
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index f42adbb..ec8bbbb 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -208,33 +208,30 @@
#if CONFIG_TX64X64
// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
#define MAX_BLOCK_DIM 64
-#define NUM_BLOCK_DIMS 6 // log2(MAX_BLOCK_DIM)
#else
#define MAX_BLOCK_DIM 32
-#define NUM_BLOCK_DIMS 5
#endif // CONFIG_TX64X64
-static const uint8_t sm_weight_arrays[NUM_BLOCK_DIMS][MAX_BLOCK_DIM] = {
+static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
+ // Unused, because we always offset by bs, which is at least 2.
+ 0, 0,
// bs = 2
- { 255, 128 },
+ 255, 128,
// bs = 4
- { 255, 149, 85, 64 },
+ 255, 149, 85, 64,
// bs = 8
- { 255, 197, 146, 105, 73, 50, 37, 32 },
+ 255, 197, 146, 105, 73, 50, 37, 32,
// bs = 16
- { 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16 },
+ 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
// bs = 32
- {
- 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122,
- 111, 101, 92, 83, 74, 66, 59, 52, 45, 39, 34,
- 29, 25, 21, 17, 14, 12, 10, 9, 8, 8 },
+ 255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
+ 66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
#if CONFIG_TX64X64
// bs = 64
- { 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169,
- 163, 156, 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96,
- 91, 86, 82, 77, 73, 69, 65, 61, 57, 54, 50, 47, 44,
- 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15, 13,
- 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4 },
+ 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
+ 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
+ 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
+ 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
#endif // CONFIG_TX64X64
};
@@ -250,10 +247,7 @@
const uint8_t *above, const uint8_t *left) {
const uint8_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint8_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int arr_index = get_msb(bs) - 1;
- assert(arr_index >= 0);
- assert(arr_index < NUM_BLOCK_DIMS);
- const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+ const uint8_t *const sm_weights = sm_weight_arrays + bs;
// scale = 2 * 2^sm_weight_log2_scale
const int log2_scale = 1 + sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -282,10 +276,7 @@
const uint8_t *above,
const uint8_t *left) {
const uint8_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
- const int arr_index = get_msb(bs) - 1;
- assert(arr_index >= 0);
- assert(arr_index < NUM_BLOCK_DIMS);
- const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+ const uint8_t *const sm_weights = sm_weight_arrays + bs;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -313,10 +304,7 @@
const uint8_t *above,
const uint8_t *left) {
const uint8_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int arr_index = get_msb(bs) - 1;
- assert(arr_index >= 0);
- assert(arr_index < NUM_BLOCK_DIMS);
- const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+ const uint8_t *const sm_weights = sm_weight_arrays + bs;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -807,10 +795,7 @@
const uint16_t *left, int bd) {
const uint16_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int arr_index = get_msb(bs) - 1;
- assert(arr_index >= 0);
- assert(arr_index < NUM_BLOCK_DIMS);
- const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+ const uint8_t *const sm_weights = sm_weight_arrays + bs;
// scale = 2 * 2^sm_weight_log2_scale
const int log2_scale = 1 + sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -839,10 +824,7 @@
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
const uint16_t below_pred = left[bs - 1]; // estimated by bottom-left pixel
- const int arr_index = get_msb(bs) - 1;
- assert(arr_index >= 0);
- assert(arr_index < NUM_BLOCK_DIMS);
- const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+ const uint8_t *const sm_weights = sm_weight_arrays + bs;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -870,10 +852,7 @@
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
const uint16_t right_pred = above[bs - 1]; // estimated by top-right pixel
- const int arr_index = get_msb(bs) - 1;
- assert(arr_index >= 0);
- assert(arr_index < NUM_BLOCK_DIMS);
- const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+ const uint8_t *const sm_weights = sm_weight_arrays + bs;
// scale = 2^sm_weight_log2_scale
const int log2_scale = sm_weight_log2_scale;
const uint16_t scale = (1 << sm_weight_log2_scale);