smooth_pred: 1D weights array to use less memory.

As the block sizes are powers of two, we can index into the weights
array as sm_weights_array[bs] now.

This uses 2 * MAX_BLOCK_DIM memory, instead of NUM_BLOCK_DIMS *
MAX_BLOCK_DIM earlier.

Change-Id: I55bcedc188b8ed7def719c4d002c1fe2ec5e1b7f
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index f42adbb..ec8bbbb 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -208,33 +208,30 @@
 #if CONFIG_TX64X64
 // max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
 #define MAX_BLOCK_DIM 64
-#define NUM_BLOCK_DIMS 6  // log2(MAX_BLOCK_DIM)
 #else
 #define MAX_BLOCK_DIM 32
-#define NUM_BLOCK_DIMS 5
 #endif  // CONFIG_TX64X64
 
-static const uint8_t sm_weight_arrays[NUM_BLOCK_DIMS][MAX_BLOCK_DIM] = {
+static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
+  // Unused, because we always offset by bs, which is at least 2.
+  0, 0,
   // bs = 2
-  { 255, 128 },
+  255, 128,
   // bs = 4
-  { 255, 149, 85, 64 },
+  255, 149, 85, 64,
   // bs = 8
-  { 255, 197, 146, 105, 73, 50, 37, 32 },
+  255, 197, 146, 105, 73, 50, 37, 32,
   // bs = 16
-  { 255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16 },
+  255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
   // bs = 32
-  {
-      255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122,
-      111, 101, 92,  83,  74,  66,  59,  52,  45,  39,  34,
-      29,  25,  21,  17,  14,  12,  10,  9,   8,   8 },
+  255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
+  66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
 #if CONFIG_TX64X64
   // bs = 64
-  { 255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169,
-    163, 156, 150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96,
-    91,  86,  82,  77,  73,  69,  65,  61,  57,  54,  50,  47,  44,
-    41,  38,  35,  32,  29,  27,  25,  22,  20,  18,  16,  15,  13,
-    12,  10,  9,   8,   7,   6,   6,   5,   5,   4,   4,   4 },
+  255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
+  150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
+  65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
+  13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
 #endif  // CONFIG_TX64X64
 };
 
@@ -250,10 +247,7 @@
                                     const uint8_t *above, const uint8_t *left) {
   const uint8_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
   const uint8_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = get_msb(bs) - 1;
-  assert(arr_index >= 0);
-  assert(arr_index < NUM_BLOCK_DIMS);
-  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays + bs;
   // scale = 2 * 2^sm_weight_log2_scale
   const int log2_scale = 1 + sm_weight_log2_scale;
   const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -282,10 +276,7 @@
                                       const uint8_t *above,
                                       const uint8_t *left) {
   const uint8_t below_pred = left[bs - 1];  // estimated by bottom-left pixel
-  const int arr_index = get_msb(bs) - 1;
-  assert(arr_index >= 0);
-  assert(arr_index < NUM_BLOCK_DIMS);
-  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays + bs;
   // scale = 2^sm_weight_log2_scale
   const int log2_scale = sm_weight_log2_scale;
   const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -313,10 +304,7 @@
                                       const uint8_t *above,
                                       const uint8_t *left) {
   const uint8_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = get_msb(bs) - 1;
-  assert(arr_index >= 0);
-  assert(arr_index < NUM_BLOCK_DIMS);
-  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays + bs;
   // scale = 2^sm_weight_log2_scale
   const int log2_scale = sm_weight_log2_scale;
   const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -807,10 +795,7 @@
                                            const uint16_t *left, int bd) {
   const uint16_t below_pred = left[bs - 1];   // estimated by bottom-left pixel
   const uint16_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = get_msb(bs) - 1;
-  assert(arr_index >= 0);
-  assert(arr_index < NUM_BLOCK_DIMS);
-  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays + bs;
   // scale = 2 * 2^sm_weight_log2_scale
   const int log2_scale = 1 + sm_weight_log2_scale;
   const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -839,10 +824,7 @@
                                              int bs, const uint16_t *above,
                                              const uint16_t *left, int bd) {
   const uint16_t below_pred = left[bs - 1];  // estimated by bottom-left pixel
-  const int arr_index = get_msb(bs) - 1;
-  assert(arr_index >= 0);
-  assert(arr_index < NUM_BLOCK_DIMS);
-  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays + bs;
   // scale = 2^sm_weight_log2_scale
   const int log2_scale = sm_weight_log2_scale;
   const uint16_t scale = (1 << sm_weight_log2_scale);
@@ -870,10 +852,7 @@
                                              int bs, const uint16_t *above,
                                              const uint16_t *left, int bd) {
   const uint16_t right_pred = above[bs - 1];  // estimated by top-right pixel
-  const int arr_index = get_msb(bs) - 1;
-  assert(arr_index >= 0);
-  assert(arr_index < NUM_BLOCK_DIMS);
-  const uint8_t *const sm_weights = sm_weight_arrays[arr_index];
+  const uint8_t *const sm_weights = sm_weight_arrays + bs;
   // scale = 2^sm_weight_log2_scale
   const int log2_scale = sm_weight_log2_scale;
   const uint16_t scale = (1 << sm_weight_log2_scale);