lc-dec: Add sf to disable CDEF adaptively

This patch adds speed feature 'adaptive_cdef_mode' that always
disables chroma CDEF, and disables luma CDEF when the MSE
improvement of a frame over zero filter strength falls below
a threshold.

Performance results of encoder and decoder for streams generated
with low complexity decode enabled, measured w.r.t a baseline with
CDEF disabled (--enable-cdef=0).

      Decode Time                   BD-Rate Loss(%)
cpu  Reduction(%)  avg.psnr   ssim    vmaf    vmaf_neg  uvq_1p5
 1     -4.048     -0.3578   -0.4008  -0.5414  -0.3696   -8.6267
 2     -4.164     -0.4334   -0.5844  -0.5302  -0.3774  -10.3880
 3     -4.716     -0.6492   -0.8754  -0.7096  -0.5201  -10.9057

STATS_CHANGED for low complexity decode

Change-Id: I5e21281a5bb8f97c748cb37a9169690a02c45885
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 9fad50c..7585162 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -3352,7 +3352,8 @@
   while (cdef_get_next_job(cdef_sync, cdef_search_ctx, &cur_fbr, &cur_fbc,
                            &sb_count)) {
     av1_cdef_mse_calc_block(cdef_search_ctx, error_info, cur_fbr, cur_fbc,
-                            sb_count);
+                            sb_count,
+                            thread_data->cpi->sf.lpf_sf.adaptive_cdef_mode);
   }
   error_info->setjmp = 0;
   return 1;
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index a315f6a..8ece9331 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -511,11 +511,13 @@
 //   CDEF search context.
 //   fbr: Row index in units of 64x64 block
 //   fbc: Column index in units of 64x64 block
+//   adaptive_cdef_mode: Speed feature to control CDEF adaptively.
 // Returns:
 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
                              struct aom_internal_error_info *error_info,
-                             int fbr, int fbc, int sb_count) {
+                             int fbr, int fbc, int sb_count,
+                             int adaptive_cdef_mode) {
   // TODO(aomedia:3276): Pass error_info to the low-level functions as required
   // in future to handle error propagation.
   (void)error_info;
@@ -573,6 +575,17 @@
   const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
   int dirinit = 0;
   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
+    // To disable CDEF filter of chroma, set MSE of chroma strength index 0 to
+    // zero and all non-zero strength indices to 1, such that the joint
+    // luma-chroma strength search always chooses filter strength 0 for chroma
+    // due to least cost.
+    if (adaptive_cdef_mode > 0 && pli > 0) {
+      cdef_search_ctx->mse[pli][sb_count][0] = 0;
+      for (int gi = 1; gi < cdef_search_ctx->total_strengths; gi++)
+        cdef_search_ctx->mse[pli][sb_count][gi] = 1;
+      break;
+    }
+
     /* We avoid filtering the pixels for which some of the pixels to
     average are outside the frame. We could change the filter instead,
     but it would add special cases for any future vectorization. */
@@ -614,10 +627,12 @@
 // Inputs:
 //   cdef_search_ctx: Pointer to the structure containing parameters related to
 //   CDEF search context.
+//   adaptive_cdef_mode: Speed feature to control CDEF adaptively.
 // Returns:
 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx,
-                                struct aom_internal_error_info *error_info) {
+                                struct aom_internal_error_info *error_info,
+                                int adaptive_cdef_mode) {
   // Loop over each sb.
   for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
     for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
@@ -625,7 +640,7 @@
       if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
       // Calculate mse for each sb and store the relevant sb index.
       av1_cdef_mse_calc_block(cdef_search_ctx, error_info, fbr, fbc,
-                              cdef_search_ctx->sb_count);
+                              cdef_search_ctx->sb_count, adaptive_cdef_mode);
       cdef_search_ctx->sb_count++;
     }
   }
@@ -869,6 +884,7 @@
   const int damping = 3 + (cm->quant_params.base_qindex >> 6);
   const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
                     pick_method <= CDEF_FAST_SEARCH_LVL5);
+  const int adaptive_cdef_mode = cpi->sf.lpf_sf.adaptive_cdef_mode;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
 
@@ -886,7 +902,7 @@
   if (cpi->mt_info.num_workers > 1) {
     av1_cdef_mse_calc_frame_mt(cpi);
   } else {
-    cdef_mse_calc_frame(cdef_search_ctx, cm->error);
+    cdef_mse_calc_frame(cdef_search_ctx, cm->error, adaptive_cdef_mode);
   }
 
   /* Search for different number of signaling bits. */
@@ -956,9 +972,11 @@
 
   cdef_info->cdef_bits = nb_strength_bits;
   cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
+  uint64_t tot_best_mse_y = 0, tot_zero_mse_y = 0;
   for (int i = 0; i < sb_count; i++) {
     uint64_t best_mse = UINT64_MAX;
     int best_gi = 0;
+    tot_zero_mse_y += mse[0][i][0];
     for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
       uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
       if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
@@ -967,9 +985,36 @@
         best_mse = curr;
       }
     }
+    // When adaptive_cdef_mode is enabled, CDEF filter of chroma is disabled by
+    // setting the MSE at chroma strength index 0 to zero, ensuring zero filter
+    // strength always wins for chroma. Hence, best_mse reflects luma MSE only.
+    tot_best_mse_y += best_mse;
     mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
         best_gi;
   }
+
+  // Disable CDEF filter of luma if the MSE improvement over zero filter
+  // strength is below the threshold.
+  if (cm->current_frame.pyramid_level > 1 &&
+      cpi->sf.lpf_sf.adaptive_cdef_mode > 0 && sb_count > 0) {
+    const double cdef_mse_pct_imp_thresh = 4.0;
+    double luma_mse_gain_pct = 0.0;
+    // Percentage reduction in luma MSE with best CDEF strength vs zero
+    // strength.
+    luma_mse_gain_pct =
+        (tot_zero_mse_y - tot_best_mse_y) * 100.0 / tot_zero_mse_y;
+    if (luma_mse_gain_pct < cdef_mse_pct_imp_thresh) {
+      cdef_info->nb_cdef_strengths = 1;
+      cdef_info->cdef_bits = 0;
+      cdef_info->cdef_strengths[0] = 0;
+      cdef_info->cdef_uv_strengths[0] = 0;
+      for (int i = 0; i < sb_count; i++) {
+        mi_params->mi_grid_base[cdef_search_ctx->sb_index[i]]->cdef_strength =
+            0;
+      }
+    }
+  }
+
   if (fast) {
     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
       const int luma_strength = cdef_info->cdef_strengths[j];
diff --git a/av1/encoder/pickcdef.h b/av1/encoder/pickcdef.h
index 70fc0e2..006b1d0 100644
--- a/av1/encoder/pickcdef.h
+++ b/av1/encoder/pickcdef.h
@@ -218,7 +218,8 @@
 
 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx,
                              struct aom_internal_error_info *error_info,
-                             int fbr, int fbc, int sb_count);
+                             int fbr, int fbc, int sb_count,
+                             int adaptive_cdef_mode);
 /*!\endcond */
 
 /*!\brief AV1 CDEF parameter search
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 94642c4..1f2d910 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -644,6 +644,7 @@
 
     sf->hl_sf.ref_frame_mvs_lvl = 2;
 
+    sf->lpf_sf.adaptive_cdef_mode = 1;
     sf->lpf_sf.dual_sgr_penalty_level = boosted ? 1 : 3;
     sf->lpf_sf.switchable_lr_with_bias_level = 1;
     sf->lpf_sf.skip_loop_filter_using_filt_error =
@@ -668,6 +669,7 @@
 
     sf->hl_sf.ref_frame_mvs_lvl = 1;
 
+    sf->lpf_sf.adaptive_cdef_mode = 1;
     sf->lpf_sf.skip_loop_filter_using_filt_error =
         (update_type != OVERLAY_UPDATE && update_type != INTNL_OVERLAY_UPDATE &&
          cm->current_frame.pyramid_level > 1)
@@ -2536,6 +2538,7 @@
   lpf_sf->disable_wiener_coeff_refine_search = false;
   lpf_sf->use_downsampled_wiener_stats = 0;
   lpf_sf->switchable_lr_with_bias_level = 0;
+  lpf_sf->adaptive_cdef_mode = 0;
 }
 
 static inline void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 7c75629..91b73bb 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -1615,6 +1615,13 @@
   // This speed feature is only enabled in all intra mode.
   bool zero_low_cdef_strengths;
 
+  // Decoder side speed feature for adaptive CDEF control based on MSE
+  // 0 : Enable CDEF for all planes
+  // 1 : Disable CDEF for chroma planes and disable for luma adaptively based on
+  //     current frame's pyramid level and improvement in MSE after CDEF
+  //     filtering.
+  int adaptive_cdef_mode;
+
   // Decoder side speed feature to add penalty for use of dual-sgr filters.
   // Takes values 0 - 10, 0 indicating no penalty and each additional level
   // adding a penalty of 1%