vp9_ethread: calculate and save the tok starting address for tiles

Each tile's tok starting address is calculated before the encoding
process. These addresses are stored so that the same calculation
won't be done again in packing bit stream.

Change-Id: I0a3be0301f002260c19a850303f2f73ebc47aa50
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index ebc633e..cad3109 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -934,29 +934,21 @@
   VP9_COMMON *const cm = &cpi->common;
   vp9_writer residual_bc;
   int tile_row, tile_col;
-  TOKENEXTRA *tok[4][1 << 6], *tok_end;
+  TOKENEXTRA *tok_end;
   size_t total_size = 0;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
-  TOKENEXTRA *pre_tok = cpi->tok;
-  int tile_tok = 0;
 
   vpx_memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) *
              mi_cols_aligned_to_sb(cm->mi_cols));
 
-  for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
-    for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
-      int tile_idx = tile_row * tile_cols + tile_col;
-      tok[tile_row][tile_col] = pre_tok + tile_tok;
-      pre_tok = tok[tile_row][tile_col];
-      tile_tok = allocated_tokens(cpi->tile_data[tile_idx].tile_info);
-    }
-  }
-
   for (tile_row = 0; tile_row < tile_rows; tile_row++) {
     for (tile_col = 0; tile_col < tile_cols; tile_col++) {
       int tile_idx = tile_row * tile_cols + tile_col;
-      tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
+      TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
+
+      tok_end = cpi->tile_tok[tile_row][tile_col] +
+          cpi->tok_count[tile_row][tile_col];
 
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
         vp9_start_encode(&residual_bc, data_ptr + total_size + 4);
@@ -964,8 +956,8 @@
         vp9_start_encode(&residual_bc, data_ptr + total_size);
 
       write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
-                  &residual_bc, &tok[tile_row][tile_col], tok_end);
-      assert(tok[tile_row][tile_col] == tok_end);
+                  &residual_bc, &tok, tok_end);
+      assert(tok == tok_end);
       vp9_stop_encode(&residual_bc);
       if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
         // size of this tile
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 8ba0656..a5adcbb 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3422,32 +3422,29 @@
          cm->show_frame;
 }
 
-static void tile_data_init(TileDataEnc *tile_data) {
-  int i, j;
-  for (i = 0; i < BLOCK_SIZES; ++i) {
-    for (j = 0; j < MAX_MODES; ++j) {
-      tile_data->thresh_freq_fact[i][j] = 32;
-      tile_data->mode_map[i][j] = j;
-    }
-  }
-}
-
-static void encode_tiles(VP9_COMP *cpi) {
+static void init_tile_data(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   const int tile_cols = 1 << cm->log2_tile_cols;
   const int tile_rows = 1 << cm->log2_tile_rows;
-
   int tile_col, tile_row;
-  TOKENEXTRA *tok[4][1 << 6];
-  TOKENEXTRA *pre_tok = cpi->tok;
+  TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
   int tile_tok = 0;
 
   if (cpi->tile_data == NULL) {
     CHECK_MEM_ERROR(cm, cpi->tile_data,
         vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
-      for (tile_col = 0; tile_col < tile_cols; ++tile_col)
-        tile_data_init(&cpi->tile_data[tile_row * tile_cols + tile_col]);
+      for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+        TileDataEnc *tile_data =
+            &cpi->tile_data[tile_row * tile_cols + tile_col];
+        int i, j;
+        for (i = 0; i < BLOCK_SIZES; ++i) {
+          for (j = 0; j < MAX_MODES; ++j) {
+            tile_data->thresh_freq_fact[i][j] = 32;
+            tile_data->mode_map[i][j] = j;
+          }
+        }
+      }
   }
 
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
@@ -3456,32 +3453,41 @@
           &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
       vp9_tile_init(tile_info, cm, tile_row, tile_col);
 
-      tok[tile_row][tile_col] = pre_tok + tile_tok;
-      pre_tok = tok[tile_row][tile_col];
+      cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
+      pre_tok = cpi->tile_tok[tile_row][tile_col];
       tile_tok = allocated_tokens(*tile_info);
     }
   }
+}
+
+static void encode_tiles(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  const int tile_cols = 1 << cm->log2_tile_cols;
+  const int tile_rows = 1 << cm->log2_tile_rows;
+  int tile_col, tile_row;
+
+  init_tile_data(cpi);
 
   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
       const TileInfo * const tile_info =
           &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
-      TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
+      TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
       int mi_row;
-      TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+      TileDataEnc *this_tile =
+          &cpi->tile_data[tile_row * tile_cols + tile_col];
 
       for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
            mi_row += MI_BLOCK_SIZE) {
         if (cpi->sf.use_nonrd_pick_mode)
-          encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row,
-                              &tok[tile_row][tile_col]);
+          encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok);
         else
-          encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row,
-                           &tok[tile_row][tile_col]);
+          encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row, &tok);
       }
       cpi->tok_count[tile_row][tile_col] =
-          (unsigned int)(tok[tile_row][tile_col] - old_tok);
-      assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*tile_info));
+          (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
+      assert(tok - cpi->tile_tok[tile_row][tile_col] <=
+          allocated_tokens(*tile_info));
     }
   }
 }
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 7a08be1..28350ef 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -247,8 +247,8 @@
   vp9_free_frame_buffer(&cpi->alt_ref_buffer);
   vp9_lookahead_destroy(cpi->lookahead);
 
-  vpx_free(cpi->tok);
-  cpi->tok = 0;
+  vpx_free(cpi->tile_tok[0][0]);
+  cpi->tile_tok[0][0] = 0;
 
   vp9_free_pc_tree(&cpi->td);
 
@@ -543,11 +543,12 @@
 
   vp9_alloc_context_buffers(cm, cm->width, cm->height);
 
-  vpx_free(cpi->tok);
+  vpx_free(cpi->tile_tok[0][0]);
 
   {
     unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
-    CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
+    CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
+        vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
   }
 
   vp9_setup_pc_tree(&cpi->common, &cpi->td);
@@ -1800,7 +1801,6 @@
 #endif
 
   dealloc_compressor_data(cpi);
-  vpx_free(cpi->tok);
 
   for (i = 0; i < sizeof(cpi->mbgraph_stats) /
                   sizeof(cpi->mbgraph_stats[0]); ++i) {
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 81ad6b1..b75f491 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -297,7 +297,7 @@
 
   YV12_BUFFER_CONFIG last_frame_uf;
 
-  TOKENEXTRA *tok;
+  TOKENEXTRA *tile_tok[4][1 << 6];
   unsigned int tok_count[4][1 << 6];
 
   // Ambient reconstruction err target for force key frames