Palette: Use inverse_color_order to find color index faster.

Cherry-picked from aomedia/master: b1c3bb5

Change-Id: Icfc16070160fd9763abb1dbf5545103e62b4b9ff
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index 20e8904..78f4ffe 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -892,13 +892,14 @@
 
 #if CONFIG_PALETTE
 int av1_get_palette_color_context(const uint8_t *color_map, int cols, int r,
-                                  int c, int n, int *color_order) {
+                                  int c, int n, uint8_t *color_order,
+                                  int *color_idx) {
   int i, j, max, max_idx, temp;
   int scores[PALETTE_MAX_SIZE + 10];
   int weights[4] = { 3, 2, 3, 2 };
   int color_ctx = 0;
   int color_neighbors[4];
-
+  int inverse_color_order[PALETTE_MAX_SIZE];
   assert(n <= PALETTE_MAX_SIZE);
 
   if (c - 1 >= 0)
@@ -918,7 +919,10 @@
   else
     color_neighbors[3] = -1;
 
-  for (i = 0; i < PALETTE_MAX_SIZE; ++i) color_order[i] = i;
+  for (i = 0; i < PALETTE_MAX_SIZE; ++i) {
+    color_order[i] = i;
+    inverse_color_order[i] = i;
+  }
   memset(scores, 0, PALETTE_MAX_SIZE * sizeof(scores[0]));
   for (i = 0; i < 4; ++i) {
     if (color_neighbors[i] >= 0) scores[color_neighbors[i]] += weights[i];
@@ -944,6 +948,8 @@
       temp = color_order[i];
       color_order[i] = color_order[max_idx];
       color_order[max_idx] = temp;
+      inverse_color_order[color_order[i]] = i;
+      inverse_color_order[color_order[max_idx]] = max_idx;
     }
   }
 
@@ -956,7 +962,9 @@
     }
 
   if (color_ctx >= PALETTE_COLOR_CONTEXTS) color_ctx = 0;
-
+  if (color_idx != NULL) {
+    *color_idx = inverse_color_order[color_map[r * cols + c]];
+  }
   return color_ctx;
 }
 #endif  // CONFIG_PALETTE
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index d0ad807..49dbbb0 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -320,7 +320,8 @@
 
 #if CONFIG_PALETTE
 int av1_get_palette_color_context(const uint8_t *color_map, int cols, int r,
-                                  int c, int n, int *color_order);
+                                  int c, int n, uint8_t *color_order,
+                                  int *color_idx);
 #endif  // CONFIG_PALETTE
 
 #ifdef __cplusplus
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 7077788..b5f26d2 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -338,15 +338,15 @@
 #if CONFIG_PALETTE
 void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
                                aom_reader *r) {
-  MODE_INFO *const mi = xd->mi[0];
-  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const MODE_INFO *const mi = xd->mi[0];
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int rows = (4 * num_4x4_blocks_high_lookup[bsize]) >>
                    (xd->plane[plane != 0].subsampling_y);
   const int cols = (4 * num_4x4_blocks_wide_lookup[bsize]) >>
                    (xd->plane[plane != 0].subsampling_x);
-  int color_idx, color_ctx, color_order[PALETTE_MAX_SIZE];
-  int n = mbmi->palette_mode_info.palette_size[plane != 0];
+  uint8_t color_order[PALETTE_MAX_SIZE];
+  const int n = mbmi->palette_mode_info.palette_size[plane != 0];
   int i, j;
   uint8_t *color_map = xd->plane[plane != 0].color_index_map;
   const aom_prob(*const prob)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
@@ -355,10 +355,10 @@
 
   for (i = 0; i < rows; ++i) {
     for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-      color_ctx =
-          av1_get_palette_color_context(color_map, cols, i, j, n, color_order);
-      color_idx = aom_read_tree(r, av1_palette_color_tree[n - 2],
-                                prob[n - 2][color_ctx], ACCT_STR);
+      const int color_ctx = av1_get_palette_color_context(color_map, cols, i, j,
+                                                          n, color_order, NULL);
+      const int color_idx = aom_read_tree(r, av1_palette_color_tree[n - 2],
+                                          prob[n - 2][color_ctx], ACCT_STR);
       assert(color_idx >= 0 && color_idx < n);
       color_map[i * cols + j] = color_order[color_idx];
     }
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 99da9ee..fd71a1e 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1768,8 +1768,7 @@
   if (colors > 1 && colors <= 64) {
     int r, c, i, j, k;
     const int max_itr = 50;
-    int color_ctx, color_idx = 0;
-    int color_order[PALETTE_MAX_SIZE];
+    uint8_t color_order[PALETTE_MAX_SIZE];
     float *const data = x->palette_buffer->kmeans_data_buf;
     float centroids[PALETTE_MAX_SIZE];
     uint8_t *const color_map = xd->plane[0].color_index_map;
@@ -1853,13 +1852,9 @@
               1);
       for (i = 0; i < rows; ++i) {
         for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-          color_ctx = av1_get_palette_color_context(color_map, cols, i, j, k,
-                                                    color_order);
-          for (r = 0; r < k; ++r)
-            if (color_map[i * cols + j] == color_order[r]) {
-              color_idx = r;
-              break;
-            }
+          int color_idx;
+          const int color_ctx = av1_get_palette_color_context(
+              color_map, cols, i, j, k, color_order, &color_idx);
           assert(color_idx >= 0 && color_idx < k);
           this_rate += cpi->palette_y_color_cost[k - 2][color_ctx][color_idx];
         }
@@ -3647,8 +3642,7 @@
   if (colors > 1 && colors <= 64) {
     int r, c, n, i, j;
     const int max_itr = 50;
-    int color_ctx, color_idx = 0;
-    int color_order[PALETTE_MAX_SIZE];
+    uint8_t color_order[PALETTE_MAX_SIZE];
     int64_t this_sse;
     float lb_u, ub_u, val_u;
     float lb_v, ub_v, val_v;
@@ -3741,13 +3735,9 @@
 
       for (i = 0; i < rows; ++i) {
         for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-          color_ctx = av1_get_palette_color_context(color_map, cols, i, j, n,
-                                                    color_order);
-          for (r = 0; r < n; ++r)
-            if (color_map[i * cols + j] == color_order[r]) {
-              color_idx = r;
-              break;
-            }
+          int color_idx;
+          const int color_ctx = av1_get_palette_color_context(
+              color_map, cols, i, j, n, color_order, &color_idx);
           assert(color_idx >= 0 && color_idx < n);
           this_rate += cpi->palette_uv_color_cost[n - 2][color_ctx][color_idx];
         }
@@ -9377,7 +9367,7 @@
     int best_rate_nocoef;
 #endif
     int64_t distortion2 = 0, distortion_y = 0, dummy_rd = best_rd, this_rd;
-    int skippable = 0;
+    int skippable = 0, rate_overhead = 0;
     TX_SIZE best_tx_size, uv_tx;
     TX_TYPE best_tx_type;
     PALETTE_MODE_INFO palette_mode_info;
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 561beed..86ab57f 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -409,18 +409,19 @@
 }
 
 #if CONFIG_PALETTE
-void av1_tokenize_palette_sb(const AV1_COMP *cpi, struct ThreadData *const td,
-                             int plane, TOKENEXTRA **t, RUN_TYPE dry_run,
-                             BLOCK_SIZE bsize, int *rate) {
-  MACROBLOCK *const x = &td->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  uint8_t *color_map = xd->plane[plane != 0].color_index_map;
-  PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
-  int n = pmi->palette_size[plane != 0];
-  int i, j, k;
+void av1_tokenize_palette_sb(const AV1_COMP *cpi,
+                             const struct ThreadData *const td, int plane,
+                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
+                             int *rate) {
+  const MACROBLOCK *const x = &td->mb;
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const uint8_t *const color_map = xd->plane[plane != 0].color_index_map;
+  const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+  const int n = pmi->palette_size[plane != 0];
+  int i, j;
   int this_rate = 0;
-  int color_idx = -1, color_ctx, color_order[PALETTE_MAX_SIZE];
+  uint8_t color_order[PALETTE_MAX_SIZE];
   const int rows = (4 * num_4x4_blocks_high_lookup[bsize]) >>
                    (xd->plane[plane != 0].subsampling_y);
   const int cols = (4 * num_4x4_blocks_wide_lookup[bsize]) >>
@@ -431,17 +432,13 @@
 
   for (i = 0; i < rows; ++i) {
     for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-      color_ctx =
-          av1_get_palette_color_context(color_map, cols, i, j, n, color_order);
-      for (k = 0; k < n; ++k)
-        if (color_map[i * cols + j] == color_order[k]) {
-          color_idx = k;
-          break;
-        }
-      assert(color_idx >= 0 && color_idx < n);
+      int color_new_idx;
+      const int color_ctx = av1_get_palette_color_context(
+          color_map, cols, i, j, n, color_order, &color_new_idx);
+      assert(color_new_idx >= 0 && color_new_idx < n);
       if (dry_run == DRY_RUN_COSTCOEFFS)
-        this_rate += cpi->palette_y_color_cost[n - 2][color_ctx][color_idx];
-      (*t)->token = color_idx;
+        this_rate += cpi->palette_y_color_cost[n - 2][color_ctx][color_new_idx];
+      (*t)->token = color_new_idx;
       (*t)->context_tree = probs[n - 2][color_ctx];
       (*t)->skip_eob_node = 0;
       ++(*t);
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index ae896a6..89610df 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -72,7 +72,7 @@
 #endif
 #if CONFIG_PALETTE
 void av1_tokenize_palette_sb(const struct AV1_COMP *cpi,
-                             struct ThreadData *const td, int plane,
+                             const struct ThreadData *const td, int plane,
                              TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                              int *rate);
 #endif  // CONFIG_PALETTE