Refactor transform selection process

This commit re-arranges the transform type and size selectio
process. It removes an unnecessary rate-distortion cost computation
step. Local experiments show that this speeds up the encoding
process by 6% for both the baseline and the ext-intra experiment.

Change-Id: Iab3b86a63a1e9e55548466791ed5d29a0575c1e7
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 1fd0391..e7762a5 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1681,12 +1681,6 @@
   if (mbmi->tx_size >= TX_32X32)
     assert(mbmi->tx_type == DCT_DCT);
 #endif
-
-  txfm_rd_in_plane(x,
-                   cpi,
-                   &r, &d, &s,
-                   &sse, ref_best_rd, 0, bs, best_tx,
-                   cpi->sf.use_fast_coef_costing);
 }
 
 static void super_block_yrd(VP10_COMP *cpi, MACROBLOCK *x, int *rate,
@@ -8249,6 +8243,7 @@
       if (this_mode != DC_PRED && this_mode != TM_PRED)
         rate2 += intra_cost_penalty;
       distortion2 = distortion_y + distortion_uv;
+      vp10_encode_intra_block_plane(x, bsize, 0);
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         x->recon_variance =