Add rate computation to palette

Currently the rate is never computed for the palette color indices.
The code to compute the rate is inside av1_tokenize_palette_sb
when dry_run == DRY_RUN_COSTCOEFFS, but av1_tokenize_palette_sb is
only called when !dry_run.

Change-Id: Ie33eae9e4bcf1997a22dc939f31001334cb2c399
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e9aa9bb..36d7a75 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5968,10 +5968,14 @@
       sum_intra_stats(td->counts, xd, mi, xd->above_mi, xd->left_mi,
                       frame_is_intra_only(cm), mi_row, mi_col);
     }
-    if (bsize >= BLOCK_8X8 && !dry_run) {
+    if (bsize >= BLOCK_8X8) {
       for (plane = 0; plane <= 1; ++plane) {
-        if (mbmi->palette_mode_info.palette_size[plane] > 0)
-          av1_tokenize_palette_sb(td, plane, t, dry_run, bsize, rate);
+        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
+          if (!dry_run)
+            av1_tokenize_palette_sb(x, plane, t, bsize);
+          else if (dry_run == DRY_RUN_COSTCOEFFS)
+            rate += av1_cost_palette_sb(x, plane, bsize);
+        }
       }
     }
 #if CONFIG_VAR_TX
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index acc7b10..8ec6fdf 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -3119,9 +3119,8 @@
 #endif  // CONFIG_FILTER_INTRA
 
   if (colors > 1 && colors <= 64) {
-    int r, c, i, j, k, palette_mode_cost;
+    int r, c, i, k, palette_mode_cost;
     const int max_itr = 50;
-    uint8_t color_order[PALETTE_MAX_SIZE];
     float *const data = x->palette_buffer->kmeans_data_buf;
     float centroids[PALETTE_MAX_SIZE];
     float lb, ub, val;
@@ -3230,16 +3229,7 @@
                                                     color_cache, n_cache,
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
                                                     cpi->common.bit_depth);
-      for (i = 0; i < rows; ++i) {
-        for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-          int color_idx;
-          const int color_ctx = av1_get_palette_color_index_context(
-              color_map, block_width, i, j, k, color_order, &color_idx);
-          assert(color_idx >= 0 && color_idx < k);
-          palette_mode_cost += x->palette_y_color_cost[k - PALETTE_MIN_SIZE]
-                                                      [color_ctx][color_idx];
-        }
-      }
+      palette_mode_cost += av1_cost_palette_sb(x, 0, bsize);
       this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
       if (*best_model_rd != INT64_MAX &&
           this_model_rd > *best_model_rd + (*best_model_rd >> 1))
@@ -5573,7 +5563,6 @@
   if (colors > 1 && colors <= 64) {
     int r, c, n, i, j;
     const int max_itr = 50;
-    uint8_t color_order[PALETTE_MAX_SIZE];
     float lb_u, ub_u, val_u;
     float lb_v, ub_v, val_v;
     float *const data = x->palette_buffer->kmeans_data_buf;
@@ -5678,17 +5667,7 @@
                                              color_cache, n_cache,
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
                                              cpi->common.bit_depth);
-      for (i = 0; i < rows; ++i) {
-        for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
-          int color_idx;
-          const int color_ctx = av1_get_palette_color_index_context(
-              color_map, plane_block_width, i, j, n, color_order, &color_idx);
-          assert(color_idx >= 0 && color_idx < n);
-          this_rate += x->palette_uv_color_cost[n - PALETTE_MIN_SIZE][color_ctx]
-                                               [color_idx];
-        }
-      }
-
+      this_rate += av1_cost_palette_sb(x, 1, bsize);
       this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
       if (this_rd < *best_rd) {
         *best_rd = this_rd;
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index f7fe706..bc7d2a3 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -329,11 +329,10 @@
 }
 #endif  // !CONFIG_PVQ || CONFIG_VAR_TX
 
-void av1_tokenize_palette_sb(const struct ThreadData *const td, int plane,
-                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                             int *rate) {
+static int cost_and_tokenize_map_sb(const MACROBLOCK *const x, int plane,
+                                    TOKENEXTRA **t, int calc_rate,
+                                    BLOCK_SIZE bsize) {
   assert(plane == 0 || plane == 1);
-  const MACROBLOCK *const x = &td->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const uint8_t *const color_map = xd->plane[plane].color_index_map;
@@ -342,17 +341,14 @@
       *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] =
       plane ? xd->tile_ctx->palette_uv_color_index_cdf
             : xd->tile_ctx->palette_y_color_index_cdf;
+  const int(*color_cost)[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
+                        [PALETTE_COLORS] = plane ? &x->palette_uv_color_cost
+                                                 : &x->palette_y_color_cost;
   int plane_block_width, rows, cols;
+  const int n = pmi->palette_size[plane];
   av1_get_block_dimensions(bsize, plane, xd, &plane_block_width, NULL, &rows,
                            &cols);
 
-  // The first color index does not use context or entropy.
-  (*t)->token = color_map[0];
-  (*t)->palette_cdf = NULL;
-  ++(*t);
-
-  const int n = pmi->palette_size[plane];
-  const int calc_rate = rate && dry_run == DRY_RUN_COSTCOEFFS;
   int this_rate = 0;
   uint8_t color_order[PALETTE_MAX_SIZE];
 #if CONFIG_PALETTE_THROUGHPUT
@@ -368,15 +364,34 @@
           color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
       assert(color_new_idx >= 0 && color_new_idx < n);
       if (calc_rate) {
-        this_rate += x->palette_y_color_cost[n - PALETTE_MIN_SIZE][color_ctx]
-                                            [color_new_idx];
+        this_rate +=
+            (*color_cost)[n - PALETTE_MIN_SIZE][color_ctx][color_new_idx];
+      } else {
+        (*t)->token = color_new_idx;
+        (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx];
+        ++(*t);
       }
-      (*t)->token = color_new_idx;
-      (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx];
-      ++(*t);
     }
   }
-  if (rate) *rate += this_rate;
+  if (calc_rate) return this_rate;
+  return 0;
+}
+
+int av1_cost_palette_sb(const MACROBLOCK *const x, int plane,
+                        BLOCK_SIZE bsize) {
+  return cost_and_tokenize_map_sb(x, plane, NULL, 1, bsize);
+}
+
+void av1_tokenize_palette_sb(const MACROBLOCK *const x, int plane,
+                             TOKENEXTRA **t, BLOCK_SIZE bsize) {
+  assert(plane == 0 || plane == 1);
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const uint8_t *const color_map = xd->plane[plane].color_index_map;
+  // The first color index does not use context or entropy.
+  (*t)->token = color_map[0];
+  (*t)->palette_cdf = NULL;
+  ++(*t);
+  cost_and_tokenize_map_sb(x, plane, t, 0, bsize);
 }
 
 #if CONFIG_PVQ
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 02c0be6..08b68f2 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -74,9 +74,12 @@
                            TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row,
                            int mi_col, BLOCK_SIZE bsize, int *rate);
 #endif
-void av1_tokenize_palette_sb(const struct ThreadData *const td, int plane,
-                             TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
-                             int *rate);
+
+int av1_cost_palette_sb(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize);
+
+void av1_tokenize_palette_sb(const MACROBLOCK *const x, int plane,
+                             TOKENEXTRA **t, BLOCK_SIZE bsize);
+
 void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td,
                      TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                      int *rate, const int mi_row, const int mi_col);