Make txk-sel support cfl in key frame coding

Properly store and update the luma component result in the key
frame rate distortion optimization process for chroma component
prediction. The txk-sel provides 0.15% gains for key frame coding
on top of lv-map/multi and all other default experiments.

Change-Id: I694860607b318d0a84333ed928d3de85c7676623
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 2c02094..6cd32e2 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -2909,6 +2909,8 @@
              sizeof(*best_txk_type) *
                  (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
 #endif
+      memcpy(ctx->blk_skip[0], x->blk_skip[0],
+             sizeof(uint8_t) * ctx->num_4x4_blk);
       *rate = this_rate;
       *rate_tokenonly = tokenonly_rd_stats.rate;
       *distortion = tokenonly_rd_stats.dist;
@@ -2926,8 +2928,6 @@
     memcpy(mbmi->txk_type, best_txk_type,
            sizeof(*best_txk_type) *
                (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
-    memcpy(ctx->blk_skip[0], x->blk_skip[0],
-           sizeof(uint8_t) * ctx->num_4x4_blk);
 #endif
     return 1;
   } else {