Move color_index_map buffer to CB_BUFFER structure

To facilitate row based multi-threading of decoding stage, move
the color_index_map buffer from ThreadData to CB_BUFFER structure.

Change-Id: If0187bb5351d88901359cf101d9bfdcc351fb2a0
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index e5b8365..3e8d1d6 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -395,6 +395,7 @@
   DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]);
   eob_info eob_data[MAX_MB_PLANE]
                    [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+  DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
 } CB_BUFFER;
 
 typedef struct macroblockd_plane {
@@ -597,6 +598,7 @@
 
   uint16_t cb_offset[MAX_MB_PLANE];
   uint16_t txb_offset[MAX_MB_PLANE];
+  uint16_t color_index_map_offset[2];
 } MACROBLOCKD;
 
 static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 276f79a..cf81ca6 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -1507,7 +1507,8 @@
 
   if (use_palette) {
     int r, c;
-    const uint8_t *const map = xd->plane[plane != 0].color_index_map;
+    const uint8_t *const map = xd->plane[plane != 0].color_index_map +
+                               xd->color_index_map_offset[plane != 0];
     const uint16_t *const palette =
         mbmi->palette_mode_info.palette_colors + plane * PALETTE_MAX_SIZE;
     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 6e984c4..2cf12c5 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1054,6 +1054,16 @@
     dec_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
 }
 
+static void set_color_index_map_offset(MACROBLOCKD *const xd, int plane,
+                                       aom_reader *r) {
+  (void)r;
+  Av1ColorMapParam params;
+  const MB_MODE_INFO *const mbmi = xd->mi[0];
+  av1_get_block_dimensions(mbmi->sb_type, plane, xd, &params.plane_width,
+                           &params.plane_height, NULL, NULL);
+  xd->color_index_map_offset[plane] += params.plane_width * params.plane_height;
+}
+
 static void decode_token_and_recon_block(AV1Decoder *const pbi,
                                          MACROBLOCKD *const xd, int mi_row,
                                          int mi_col, aom_reader *r,
@@ -1216,6 +1226,9 @@
     cfl_store_inter_block(cm, xd);
   }
 
+  av1_visit_palette(pbi, xd, mi_row, mi_col, r, bsize,
+                    set_color_index_map_offset);
+
   int reader_corrupted_flag = aom_reader_has_error(r);
   aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag);
 }
@@ -1330,19 +1343,8 @@
                          PARTITION_TYPE partition, BLOCK_SIZE bsize) {
   decode_mbmi_block(pbi, xd, mi_row, mi_col, r, partition, bsize);
 
-  if (!is_inter_block(xd->mi[0])) {
-    for (int plane = 0; plane < AOMMIN(2, av1_num_planes(&pbi->common));
-         ++plane) {
-      const struct macroblockd_plane *const pd = &xd->plane[plane];
-      if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
-                              pd->subsampling_y)) {
-        if (xd->mi[0]->palette_mode_info.palette_size[plane])
-          av1_decode_palette_tokens(xd, plane, r);
-      } else {
-        assert(xd->mi[0]->palette_mode_info.palette_size[plane] == 0);
-      }
-    }
-  }
+  av1_visit_palette(pbi, xd, mi_row, mi_col, r, bsize,
+                    av1_decode_palette_tokens);
 
   AV1_COMMON *cm = &pbi->common;
   MB_MODE_INFO *mbmi = xd->mi[0];
@@ -2506,6 +2508,10 @@
     xd->cb_offset[plane] = 0;
     xd->txb_offset[plane] = 0;
   }
+  xd->plane[0].color_index_map = cb_buffer->color_index_map[0];
+  xd->plane[1].color_index_map = cb_buffer->color_index_map[1];
+  xd->color_index_map_offset[0] = 0;
+  xd->color_index_map_offset[1] = 0;
 }
 
 static void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td,
@@ -2683,8 +2689,6 @@
       // Initialise the tile context from the frame context
       tile_data->tctx = *cm->fc;
       td->xd.tile_ctx = &tile_data->tctx;
-      td->xd.plane[0].color_index_map = td->color_index_map[0];
-      td->xd.plane[1].color_index_map = td->color_index_map[1];
 
       // decode tile
       decode_tile(pbi, &pbi->td, row, col);
@@ -2775,8 +2779,6 @@
       // Initialise the tile context from the frame context
       tile_data->tctx = *cm->fc;
       td->xd.tile_ctx = &tile_data->tctx;
-      td->xd.plane[0].color_index_map = td->color_index_map[0];
-      td->xd.plane[1].color_index_map = td->color_index_map[1];
 #if CONFIG_ACCOUNTING
       if (pbi->acct_enabled) {
         tile_data->bit_reader.accounting->last_tell_frac =
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index 94b9a2e..d5cc43d 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -189,6 +189,24 @@
   aom_free(pbi);
 }
 
+void av1_visit_palette(AV1Decoder *const pbi, MACROBLOCKD *const xd, int mi_row,
+                       int mi_col, aom_reader *r, BLOCK_SIZE bsize,
+                       palette_visitor_fn_t visit) {
+  if (!is_inter_block(xd->mi[0])) {
+    for (int plane = 0; plane < AOMMIN(2, av1_num_planes(&pbi->common));
+         ++plane) {
+      const struct macroblockd_plane *const pd = &xd->plane[plane];
+      if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
+                              pd->subsampling_y)) {
+        if (xd->mi[0]->palette_mode_info.palette_size[plane])
+          visit(xd, plane, r);
+      } else {
+        assert(xd->mi[0]->palette_mode_info.palette_size[plane] == 0);
+      }
+    }
+  }
+}
+
 static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
                             const YV12_BUFFER_CONFIG *b) {
   return a->y_height == b->y_height && a->y_width == b->y_width &&
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index b866bf9..2443744 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -38,7 +38,6 @@
   DECLARE_ALIGNED(32, MACROBLOCKD, xd);
   /* dqcoeff are shared by all the planes. So planes must be decoded serially */
   DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_TX_SQUARE]);
-  DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_PALETTE_SQUARE]);
   CB_BUFFER cb_buffer_base;
   uint8_t *mc_buf[2];
   int32_t mc_buf_size;
@@ -227,6 +226,13 @@
     return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
 }
 
+typedef void (*palette_visitor_fn_t)(MACROBLOCKD *const xd, int plane,
+                                     aom_reader *r);
+
+void av1_visit_palette(AV1Decoder *const pbi, MACROBLOCKD *const xd, int mi_row,
+                       int mi_col, aom_reader *r, BLOCK_SIZE bsize,
+                       palette_visitor_fn_t visit);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 9552543..9d54bd1 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -66,7 +66,8 @@
                                aom_reader *r) {
   assert(plane == 0 || plane == 1);
   Av1ColorMapParam params;
-  params.color_map = xd->plane[plane].color_index_map;
+  params.color_map =
+      xd->plane[plane].color_index_map + xd->color_index_map_offset[plane];
   params.map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
                          : xd->tile_ctx->palette_y_color_index_cdf;
   const MB_MODE_INFO *const mbmi = xd->mi[0];