Replace full txfm rd search with luma writing

The cfl route needs the reconstructed luma pixels in the chroma
RD search. Replace an extra route of txfm_rd_in_plane with the
use of av1_encode_intra_block_plane to directly write out the
needed luma pixels in the key frame rate-distortion optimization
process.

The compression stats remain identical. The key frame encoding
speed is on average 3% faster for a few tested 1080p clips.

Change-Id: I65760b657a6d4b45bed9c4f1eca0cff1a2c274a1
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index 97225b2..24a3741 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -692,7 +692,8 @@
   // Find best coding mode & reconstruct the MB so it is available
   // as a predictor for MBs that follow in the SB
   if (frame_is_intra_only(cm)) {
-    av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
+    av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx,
+                              best_rd);
   } else {
     if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
       av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index ae4ffa1..2c02094 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -8493,8 +8493,8 @@
 }
 #endif  // CONFIG_INTRABC
 
-void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
-                               RD_STATS *rd_cost, BLOCK_SIZE bsize,
+void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
+                               int mi_col, RD_STATS *rd_cost, BLOCK_SIZE bsize,
                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -8505,6 +8505,8 @@
   TX_SIZE max_uv_tx_size;
 
   (void)cm;
+  (void)mi_row;
+  (void)mi_col;
 
   ctx->skip = 0;
   mbmi->ref_frame[0] = INTRA_FRAME;
@@ -8526,10 +8528,10 @@
     if (xd->cfl.store_y) {
       // Perform one extra call to txfm_rd_in_plane(), with the values chosen
       // during luma RDO, so we can store reconstructed luma values
-      RD_STATS this_rd_stats;
-      txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
-                       mbmi->sb_type, mbmi->tx_size,
-                       cpi->sf.use_fast_coef_costing);
+      memcpy(x->blk_skip[0], ctx->blk_skip[0],
+             sizeof(uint8_t) * ctx->num_4x4_blk);
+      av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, AOM_PLANE_Y, 1,
+                                   mi_row, mi_col);
       xd->cfl.store_y = 0;
     }
 #endif  // CONFIG_CFL
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 984d1ca..4a9e31c 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -240,8 +240,9 @@
 }
 
 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
-                               struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
-                               PICK_MODE_CONTEXT *ctx, int64_t best_rd);
+                               int mi_row, int mi_col, struct RD_STATS *rd_cost,
+                               BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
+                               int64_t best_rd);
 
 unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
                                            const struct buf_2d *ref,