Move palette code from libvpx/nextgenv2 to aomedia.

Palette code originally by huisu@. Gives 11.68% BDRate improvement on
screen content set.

Last 7 small speedup patches from me, giving 10% speedup overall for
screen content set.

From aomedia/master:
Revert 57fa626 : Remove color_index_map

From libvpx/nextgenv2:
c93e5cc : Bring palette back to nextgenv2
827e1b3 : Add test for screen content coding tools in end to end test
78b0bd0 : Complete (mostly) migration of palette mode
8a128c2 : Fixes for Palette mode
4ab0091 : Palette mode: record selected transform type
cbb8be7 : Set block size upper bound for Palette mode
bb0e692 : Convert palette from double to float.
a0a23b7 : faster code: replace nested for loops by memcpy().
016a5da : Palette code: simpler and faster duplicate removal
d000020 : Palette code cleanup
6dde801 : Use built-in qsort(); create remove_dup() method.
f746c10 : Handle centroid rounding inside palette.c itself.
d68c7b6 : Palette: count Y colors only for screen content.
f1906e9 : Palette code: remove the use of same if condition twice.

Locally made sure all combinations of "--enable-ext-intra" and/or
"--enable-palette" and/or "--enable-aom-highbitdepth" build OK.

P.S. Note for future reference:
EXT_INTRA experiment has only been moved *partially* to aomedia at this
point. If and when the rest is moved to aomedia, EXT_INTRA + PALETTE
combo needs to be moved to aomedia as well.

Change-Id: I8826780014ec8a88281e56c8258b7c966121b46f
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index c81c1e5..abf6199 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -276,6 +276,37 @@
   return c;
 }
 
+#if CONFIG_PALETTE
+void av1_decode_palette_tokens(MACROBLOCKD *const xd, int plane,
+                               aom_reader *r) {
+  MODE_INFO *const mi = xd->mi[0];
+  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  const int rows = (4 * num_4x4_blocks_high_lookup[bsize]) >>
+                   (xd->plane[plane != 0].subsampling_y);
+  const int cols = (4 * num_4x4_blocks_wide_lookup[bsize]) >>
+                   (xd->plane[plane != 0].subsampling_x);
+  int color_idx, color_ctx, color_order[PALETTE_MAX_SIZE];
+  int n = mbmi->palette_mode_info.palette_size[plane != 0];
+  int i, j;
+  uint8_t *color_map = xd->plane[plane != 0].color_index_map;
+  const aom_prob (*const prob)[PALETTE_COLOR_CONTEXTS][PALETTE_COLORS - 1] =
+      plane ? av1_default_palette_uv_color_prob
+            : av1_default_palette_y_color_prob;
+
+  for (i = 0; i < rows; ++i) {
+    for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
+      color_ctx =
+          av1_get_palette_color_context(color_map, cols, i, j, n, color_order);
+      color_idx = aom_read_tree(r, av1_palette_color_tree[n - 2],
+                                prob[n - 2][color_ctx]);
+      assert(color_idx >= 0 && color_idx < n);
+      color_map[i * cols + j] = color_order[color_idx];
+    }
+  }
+}
+#endif  // CONFIG_PALETTE
+
 int av1_decode_block_tokens(MACROBLOCKD *xd, int plane, const SCAN_ORDER *sc,
                             int x, int y, TX_SIZE tx_size, aom_reader *r,
                             int seg_id) {