Calculate coeff token cost before encoding each SB

Compression gain:
AWCY
PSNR	PSNR HVS	SSIM	CIEDE 2000
-0.42	-0.15   	-0.38	-0.62

Google testsets
lowres -0.18%
midres -0.32%

No obvious encoding speed changes observed.

Change-Id: I888cf4b22b591d79d80b2d77c9eb10d84e5de722
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index b6a2637..3f0cd23 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -4531,6 +4531,11 @@
     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
     PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2];
 
+    av1_fill_token_costs_from_cdf(x->token_head_costs,
+                                  x->e_mbd.tile_ctx->coef_head_cdfs);
+    av1_fill_token_costs_from_cdf(x->token_tail_costs,
+                                  x->e_mbd.tile_ctx->coef_tail_cdfs);
+
     if (sf->adaptive_pred_interp_filter) {
       for (i = 0; i < leaf_nodes; ++i)
         td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 81c4d88..fdb0bfe 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -421,8 +421,8 @@
   x->nmvjointcost = x->nmv_vec_cost[nmv_ctx];
 }
 
-void fill_token_costs_from_cdf(av1_coeff_cost *cost,
-                               coeff_cdf_model (*cdf)[PLANE_TYPES]) {
+void av1_fill_token_costs_from_cdf(av1_coeff_cost *cost,
+                                   coeff_cdf_model (*cdf)[PLANE_TYPES]) {
   for (int tx = 0; tx < TX_SIZES; ++tx) {
     for (int pt = 0; pt < PLANE_TYPES; ++pt) {
       for (int rt = 0; rt < REF_TYPES; ++rt) {
@@ -471,15 +471,13 @@
   }
 #endif
 
-  if (cpi->oxcf.pass != 1) {
-    fill_token_costs_from_cdf(x->token_head_costs, cm->fc->coef_head_cdfs);
-    fill_token_costs_from_cdf(x->token_tail_costs, cm->fc->coef_tail_cdfs);
 #if CONFIG_GLOBAL_MOTION
+  if (cpi->oxcf.pass != 1) {
     for (int i = 0; i < TRANS_TYPES; ++i)
       cpi->gmtype_cost[i] = (1 + (i > 0 ? GLOBAL_TYPE_BITS : 0))
                             << AV1_PROB_COST_SHIFT;
-#endif  // CONFIG_GLOBAL_MOTION
   }
+#endif  // CONFIG_GLOBAL_MOTION
 }
 
 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 8d22355..3c9646f 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -712,6 +712,9 @@
 void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
                          FRAME_CONTEXT *fc);
 
+void av1_fill_token_costs_from_cdf(av1_coeff_cost *cost,
+                                   coeff_cdf_model (*cdf)[PLANE_TYPES]);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index a632307..17bf1cc 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -315,6 +315,17 @@
   (*t)->eob_val = eob_val;
   (*t)->first_val = first_val;
   (*t)++;
+
+  if (token == BLOCK_Z_TOKEN) {
+    update_cdf(*head_cdf, 0, HEAD_TOKENS + 1);
+  } else {
+    if (eob_val != LAST_EOB) {
+      const int symb = 2 * AOMMIN(token, TWO_TOKEN) - eob_val + first_val;
+      update_cdf(*head_cdf, symb, HEAD_TOKENS + first_val);
+    }
+    if (token > ONE_TOKEN)
+      update_cdf(*tail_cdf, token - TWO_TOKEN, TAIL_TOKENS);
+  }
 }
 #endif  // !CONFIG_PVQ || CONFIG_VAR_TX