[CFL] RDO Loop Rework CfL performs an extra loop iteration during luma mode selection. Recent changes have broken the extra iteration. Remove previous approach. New approach adds the extra iteration right before uv parameter selection. Interesting fact, If the best luma intra mode already has worse RD performance than the best inter mode found so far (if any), then the entire chroma intra search is skipped, including the extra iteration. Results on Subset1 (compared to 3e18e4a with CfL) PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.3090 | -2.7271 | -2.3521 | -0.3369 | -0.3463 | -0.3525 | -1.1868 Change-Id: If67b0badd2c8ea25c61685483d39d622c1729b18

commit: 14fc50452d5be0c416e6f5466a90f98013d8ad80 [log] [tgz]
author: Luc Trudeau <luc@trud.ca> Fri Jun 16 12:40:29 2017 -0400
committer: Luc Trudeau <luc@trud.ca> Tue Jun 20 01:39:16 2017 +0000
tree: bd5121fdc37ae86771cb12a5ed890d0b270a7159
parent: 5f3d370b50916a9172e491ac9641084c6dba17cd [diff] [blame]
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 4695f26..34e4396 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -3871,16 +3871,6 @@
   od_encode_rollback(&x->daala_enc, &post_buf);
 #endif  // CONFIG_PVQ
 
-#if CONFIG_CFL
-  // Perform one extra txfm_rd_in_plane() call, this time with the best value so
-  // we can store reconstructed luma values
-  RD_STATS this_rd_stats;
-  x->cfl_store_y = 1;
-  txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
-                   mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
-  x->cfl_store_y = 0;
-#endif
-
 #if CONFIG_PALETTE
   if (try_palette) {
     rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
@@ -8718,6 +8708,7 @@
                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   struct macroblockd_plane *const pd = xd->plane;
   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
   int y_skip = 0, uv_skip = 0;
@@ -8726,11 +8717,11 @@
   const int unify_bsize = CONFIG_CB4X4;
 
   ctx->skip = 0;
-  xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
-  xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
+  mbmi->ref_frame[0] = INTRA_FRAME;
+  mbmi->ref_frame[1] = NONE_FRAME;
 #if CONFIG_INTRABC
-  xd->mi[0]->mbmi.use_intrabc = 0;
-  xd->mi[0]->mbmi.mv[0].as_int = 0;
+  mbmi->use_intrabc = 0;
+  mbmi->mv[0].as_int = 0;
 #endif  // CONFIG_INTRABC
 
   const int64_t intra_yrd =
@@ -8741,9 +8732,22 @@
                                          &dist_y, &y_skip, best_rd);
 
   if (intra_yrd < best_rd) {
-    max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
-                                     [pd[1].subsampling_x][pd[1].subsampling_y];
-    init_sbuv_mode(&xd->mi[0]->mbmi);
+#if CONFIG_CFL
+    // Perform one extra txfm_rd_in_plane() call, this time with the best value
+    // so we can store reconstructed luma values
+    RD_STATS this_rd_stats;
+
+    x->cfl_store_y = 1;
+
+    txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
+                     mbmi->sb_type, mbmi->tx_size,
+                     cpi->sf.use_fast_coef_costing);
+
+    x->cfl_store_y = 0;
+#endif
+    max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
+                                     [pd[1].subsampling_y];
+    init_sbuv_mode(mbmi);
 #if CONFIG_CB4X4
     if (!x->skip_chroma_rd)
       rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
commit	14fc50452d5be0c416e6f5466a90f98013d8ad80	[log] [tgz]
author	Luc Trudeau <luc@trud.ca>	Fri Jun 16 12:40:29 2017 -0400
committer	Luc Trudeau <luc@trud.ca>	Tue Jun 20 01:39:16 2017 +0000
tree	bd5121fdc37ae86771cb12a5ed890d0b270a7159
parent	5f3d370b50916a9172e491ac9641084c6dba17cd [diff] [blame]