Palette: remove palette_first_color_idx[] in PALETTE_MODE_INFO

Handle the first color index in the tokenization process, along with the
other color indeices.

This patch also includes some minor refactoring changes.

Test results verify that there is no implact on compression efficiency.

Change-Id: I7de51c18a52f337320331b5e8d63dfea3cf510f0
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index bcb95d8..8704469 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -643,10 +643,11 @@
 #if CONFIG_PALETTE
 static void pack_palette_tokens(aom_writer *w, const TOKENEXTRA **tp, int n,
                                 int num) {
-  int i;
   const TOKENEXTRA *p = *tp;
-
-  for (i = 0; i < num; ++i) {
+  write_uniform(w, n, p->token);  // The first color index.
+  ++p;
+  --num;
+  for (int i = 0; i < num; ++i) {
 #if CONFIG_NEW_MULTISYMBOL
     aom_write_symbol(w, p->token, p->palette_cdf, n);
 #else
@@ -656,7 +657,6 @@
 #endif
     ++p;
   }
-
   *tp = p;
 }
 #endif  // CONFIG_PALETTE
@@ -1493,12 +1493,14 @@
   if (mbmi->mode == DC_PRED) {
     const int n = pmi->palette_size[0];
     int palette_y_mode_ctx = 0;
-    if (above_mi)
+    if (above_mi) {
       palette_y_mode_ctx +=
           (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
-    if (left_mi)
+    }
+    if (left_mi) {
       palette_y_mode_ctx +=
           (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
+    }
     aom_write(
         w, n > 0,
         av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_y_mode_ctx]);
@@ -1520,7 +1522,6 @@
         aom_write_literal(w, pmi->palette_colors[i], cm->bit_depth);
       }
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
-      write_uniform(w, n, pmi->palette_first_color_idx[0]);
     }
   }
 
@@ -1552,7 +1553,6 @@
                           cm->bit_depth);
       }
 #endif  // CONFIG_PALETTE_DELTA_ENCODING
-      write_uniform(w, n, pmi->palette_first_color_idx[1]);
     }
   }
 }
@@ -2503,7 +2503,7 @@
       av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows,
                                &cols);
       assert(*tok < tok_end);
-      pack_palette_tokens(w, tok, palette_size_plane, rows * cols - 1);
+      pack_palette_tokens(w, tok, palette_size_plane, rows * cols);
       assert(*tok < tok_end + mbmi->skip);
     }
   }
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index dbce6a8..4148009 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -5968,13 +5968,8 @@
 #if CONFIG_PALETTE
     if (bsize >= BLOCK_8X8 && !dry_run) {
       for (plane = 0; plane <= 1; ++plane) {
-        if (mbmi->palette_mode_info.palette_size[plane] > 0) {
-          mbmi->palette_mode_info.palette_first_color_idx[plane] =
-              xd->plane[plane].color_index_map[0];
-          // TODO(huisu): this increases the use of token buffer. Needs stretch
-          // test to verify.
+        if (mbmi->palette_mode_info.palette_size[plane] > 0)
           av1_tokenize_palette_sb(cpi, td, plane, t, dry_run, bsize, rate);
-        }
       }
     }
 #endif  // CONFIG_PALETTE
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index d188d4e..bf29608 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -323,15 +323,12 @@
                              const struct ThreadData *const td, int plane,
                              TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize,
                              int *rate) {
+  assert(plane == 0 || plane == 1);
   const MACROBLOCK *const x = &td->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const uint8_t *const color_map = xd->plane[plane].color_index_map;
   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
-  const int n = pmi->palette_size[plane];
-  int i, j;
-  int this_rate = 0;
-  uint8_t color_order[PALETTE_MAX_SIZE];
 #if CONFIG_NEW_MULTISYMBOL
   aom_cdf_prob(
       *palette_cdf)[PALETTE_COLOR_INDEX_CONTEXTS][CDF_SIZE(PALETTE_COLORS)] =
@@ -347,24 +344,37 @@
   int plane_block_width, rows, cols;
   av1_get_block_dimensions(bsize, plane, xd, &plane_block_width, NULL, &rows,
                            &cols);
-  assert(plane == 0 || plane == 1);
 
-#if CONFIG_PALETTE_THROUGHPUT
-  int k;
-  for (k = 1; k < rows + cols - 1; ++k) {
-    for (j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
-      i = k - j;
+  // The first color index does not use context or entropy.
+  (*t)->token = color_map[0];
+#if CONFIG_NEW_MULTISYMBOL
+  (*t)->palette_cdf = NULL;
 #else
-  for (i = 0; i < rows; ++i) {
-    for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+  (*t)->context_tree = NULL;
+#endif
+  (*t)->skip_eob_node = 0;
+  ++(*t);
+
+  const int n = pmi->palette_size[plane];
+  const int calc_rate = rate && dry_run == DRY_RUN_COSTCOEFFS;
+  int this_rate = 0;
+  uint8_t color_order[PALETTE_MAX_SIZE];
+#if CONFIG_PALETTE_THROUGHPUT
+  for (int k = 1; k < rows + cols - 1; ++k) {
+    for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) {
+      int i = k - j;
+#else
+  for (int i = 0; i < rows; ++i) {
+    for (int j = (i == 0 ? 1 : 0); j < cols; ++j) {
 #endif  // CONFIG_PALETTE_THROUGHPUT
       int color_new_idx;
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, i, j, n, color_order, &color_new_idx);
       assert(color_new_idx >= 0 && color_new_idx < n);
-      if (dry_run == DRY_RUN_COSTCOEFFS)
+      if (calc_rate) {
         this_rate += cpi->palette_y_color_cost[n - PALETTE_MIN_SIZE][color_ctx]
                                               [color_new_idx];
+      }
       (*t)->token = color_new_idx;
 #if CONFIG_NEW_MULTISYMBOL
       (*t)->palette_cdf = palette_cdf[n - PALETTE_MIN_SIZE][color_ctx];