Enable optimize_b for intra blocks

Coding gain:
lowres  0.05%
midres  0.10%
hdres   0.18%

Change-Id: I508b150c02588f911a8ddddfe73c770f0819fe10
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 45505f1..3cc93fe 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -4834,7 +4834,7 @@
     int plane;
     mbmi->skip = 1;
     for (plane = 0; plane < MAX_MB_PLANE; ++plane)
-      vp10_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane);
+      vp10_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
     if (output_enabled)
       sum_intra_stats(td->counts, mi, xd->above_mi, xd->left_mi,
                       frame_is_intra_only(cm));
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 9acf00c..b636e61 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -773,6 +773,20 @@
                      VP10_XFORM_QUANT_SKIP_QUANT);
 #endif  // CONFIG_EXT_INTRA
 
+  if (args->ctx != NULL) {
+    struct optimize_ctx *const ctx = args->ctx;
+    ENTROPY_CONTEXT *a, *l;
+    a = &ctx->ta[plane][blk_col];
+    l = &ctx->tl[plane][blk_row];
+    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
+      int ctx;
+      ctx = combine_entropy_contexts(*a, *l);
+      *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
+    } else {
+      *a = *l = p->eobs[block] > 0;
+    }
+  }
+
   if (*eob) {
     // inverse transform
     inv_txfm_param.tx_type = tx_type;
@@ -794,10 +808,22 @@
   }
 }
 
-void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
+void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+                                   int enable_optimize_b) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
+  struct optimize_ctx ctx;
+  struct encode_b_args arg = {x, &ctx, &xd->mi[0]->mbmi.skip};
 
+  if (enable_optimize_b && x->optimize &&
+      (!x->skip_recode || !x->skip_optimize)) {
+    const struct macroblockd_plane* const pd = &xd->plane[plane];
+    const TX_SIZE tx_size = plane ? get_uv_tx_size(&xd->mi[0]->mbmi, pd) :
+        xd->mi[0]->mbmi.tx_size;
+    vp10_get_entropy_contexts(bsize, tx_size, pd,
+                              ctx.ta[plane], ctx.tl[plane]);
+  } else {
+    arg.ctx = NULL;
+  }
   vp10_foreach_transformed_block_in_plane(xd, bsize, plane,
                                           vp10_encode_block_intra, &arg);
 }
diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h
index e208c88..cbe15aa 100644
--- a/vp10/encoder/encodemb.h
+++ b/vp10/encoder/encodemb.h
@@ -48,7 +48,8 @@
                              BLOCK_SIZE plane_bsize,
                              TX_SIZE tx_size, void *arg);
 
-void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
+void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
+                                   int enable_optimize_b);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp10/encoder/firstpass.c b/vp10/encoder/firstpass.c
index 5936a24..61afe9d 100644
--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -619,7 +619,7 @@
       xd->mi[0]->mbmi.mode = DC_PRED;
       xd->mi[0]->mbmi.tx_size = use_dc_pred ?
          (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
-      vp10_encode_intra_block_plane(x, bsize, 0);
+      vp10_encode_intra_block_plane(x, bsize, 0, 0);
       this_error = vpx_get_mb_ss(x->plane[0].src_diff);
 
       // Keep a record of blocks that have almost no intra error residual
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index e7762a5..516a929 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -8243,7 +8243,7 @@
       if (this_mode != DC_PRED && this_mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
-      vp10_encode_intra_block_plane(x, bsize, 0);
+      vp10_encode_intra_block_plane(x, bsize, 0, 0);
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         x->recon_variance =