Add a new mode to pick CDEF based on QP

Add a new mode to estimate CDEF filter parameters based on quantizer.
Compared to no CDEF filtering, it achieves coding gains(speed 8):
lowres  0.23%
midres  0.45%
hdres   0.84%

It can potentially be used for real time mode.

Change-Id: I5127b51908733786e73b9ceddddf6b1df1199e2d
diff --git a/av1/common/cdef.h b/av1/common/cdef.h
index b1a35e8..138e1bf 100644
--- a/av1/common/cdef.h
+++ b/av1/common/cdef.h
@@ -42,7 +42,7 @@
 void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
 
 void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
-                     AV1_COMMON *cm, MACROBLOCKD *xd, int fast);
+                     AV1_COMMON *cm, MACROBLOCKD *xd, int pick_method);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 70acde9..2650bfe 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4069,7 +4069,7 @@
 #endif
     // Find CDEF parameters
     av1_cdef_search(&cm->cur_frame->buf, cpi->source, cm, xd,
-                    cpi->sf.fast_cdef_search);
+                    cpi->sf.cdef_pick_method);
 
     // Apply the filter
     av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index cf0d1ee..c1b1056 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -300,8 +300,66 @@
 }
 
 void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
-                     AV1_COMMON *cm, MACROBLOCKD *xd, int fast) {
+                     AV1_COMMON *cm, MACROBLOCKD *xd, int pick_method) {
   CdefInfo *const cdef_info = &cm->cdef_info;
+  if (pick_method == CDEF_PICK_FROM_Q) {
+    const int bd = cm->seq_params.bit_depth;
+    const int q = av1_ac_quant_Q3(cm->base_qindex, 0, bd) >> (bd - 8);
+    cdef_info->cdef_bits = 0;
+    cdef_info->nb_cdef_strengths = 1;
+    cdef_info->cdef_pri_damping = 3 + (cm->base_qindex >> 6);
+    cdef_info->cdef_sec_damping = 3 + (cm->base_qindex >> 6);
+
+    int predicted_y_f1 = 0;
+    int predicted_y_f2 = 0;
+    int predicted_uv_f1 = 0;
+    int predicted_uv_f2 = 0;
+    aom_clear_system_state();
+    if (!frame_is_intra_only(cm)) {
+      predicted_y_f1 = clamp((int)roundf(-q * q * 0.00000235939456f +
+                                         q * 0.0068615186f + 0.02709886f),
+                             0, 15);
+      predicted_y_f2 = clamp((int)roundf(-q * q * 0.000000576297339f +
+                                         q * 0.00139933452f + 0.03831067f),
+                             0, 3);
+      predicted_uv_f1 = clamp((int)roundf(-q * q * 0.000000709506878f +
+                                          q * 0.00346288458f + 0.00887099f),
+                              0, 15);
+      predicted_uv_f2 = clamp((int)roundf(q * q * 0.000000238740853f +
+                                          q * 0.000282235851f + 0.05576307f),
+                              0, 3);
+    } else {
+      predicted_y_f1 = clamp((int)roundf(q * q * 0.00000337319739f +
+                                         q * 0.0080705937f + 0.0187634f),
+                             0, 15);
+      predicted_y_f2 = clamp((int)roundf(-q * q * -0.00000291673427f +
+                                         q * 0.00277986238f + 0.0079405f),
+                             0, 3);
+      predicted_uv_f1 = clamp((int)roundf(-q * q * 0.0000130790995f +
+                                          q * 0.0128924046f - 0.00748388f),
+                              0, 15);
+      predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000326517829f +
+                                          q * 0.000355201832f + 0.00228092f),
+                              0, 3);
+    }
+    cdef_info->cdef_strengths[0] =
+        predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
+    cdef_info->cdef_uv_strengths[0] =
+        predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
+
+    const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+    const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+    MB_MODE_INFO **mbmi = cm->mi_grid_visible;
+    for (int r = 0; r < nvfb; ++r) {
+      for (int c = 0; c < nhfb; ++c) {
+        mbmi[MI_SIZE_64X64 * c]->cdef_strength = 0;
+      }
+      mbmi += MI_SIZE_64X64 * cm->mi_stride;
+    }
+
+    return;
+  }
+
   uint16_t *src[3];
   uint16_t *ref_coeff[3];
   static cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
@@ -313,6 +371,7 @@
   int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   const int pri_damping = 3 + (cm->base_qindex >> 6);
   const int sec_damping = 3 + (cm->base_qindex >> 6);
+  const int fast = pick_method == CDEF_FAST_SEARCH;
   const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
   DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
   const int num_planes = av1_num_planes(cm);
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 45b6758..d3b3142 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -282,7 +282,7 @@
     sf->inter_tx_size_search_init_depth_rect = 1;
     sf->inter_tx_size_search_init_depth_sqr = 1;
 
-    sf->fast_cdef_search = 1;
+    sf->cdef_pick_method = CDEF_FAST_SEARCH;
 
     sf->adaptive_rd_thresh = 1;
     sf->mv.auto_mv_step_size = 1;
@@ -503,7 +503,7 @@
     sf->selective_ref_frame = 3;
     sf->inter_tx_size_search_init_depth_rect = 1;
     sf->inter_tx_size_search_init_depth_sqr = 1;
-    sf->fast_cdef_search = 1;
+    sf->cdef_pick_method = CDEF_FAST_SEARCH;
 
     sf->adaptive_rd_thresh = 1;
     sf->mv.auto_mv_step_size = 1;
@@ -743,6 +743,7 @@
     sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
   }
   sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
+  sf->cdef_pick_method = CDEF_FULL_SEARCH;
   sf->use_fast_coef_costing = 0;
   sf->max_intra_bsize = BLOCK_LARGEST;
   // This setting only takes effect when partition_search_type is set
@@ -758,7 +759,6 @@
   sf->ml_prune_ab_partition = 0;
   sf->ml_prune_4_partition = 0;
   sf->ml_early_term_after_part_split_level = 0;
-  sf->fast_cdef_search = 0;
   for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
     sf->ml_partition_search_breakout_thresh[i] = -1;  // -1 means not enabled.
   }
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 806bc08..193ef74 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -131,6 +131,12 @@
 } UENUM1BYTE(LPF_PICK_METHOD);
 
 enum {
+  CDEF_FULL_SEARCH,
+  CDEF_FAST_SEARCH,  // Search among a subset of all possible filters.
+  CDEF_PICK_FROM_Q   // Estimate filter strength based on quantizer.
+} UENUM1BYTE(CDEF_PICK_METHOD);
+
+enum {
   // Terminate search early based on distortion so far compared to
   // qp step, distortion in the neighborhood of the frame, etc.
   FLAG_EARLY_TERMINATE = 1 << 0,
@@ -381,8 +387,6 @@
   // 1 - 2 increasing aggressiveness in order.
   int ml_early_term_after_part_split_level;
 
-  int fast_cdef_search;
-
   // 2-pass coding block partition search, and also use the mode decisions made
   // in the initial partition search to prune mode candidates, e.g. ref frames.
   int two_pass_partition_search;
@@ -482,6 +486,9 @@
   // This feature controls how the loop filter level is determined.
   LPF_PICK_METHOD lpf_pick;
 
+  // Control how the CDEF strength is determined.
+  CDEF_PICK_METHOD cdef_pick_method;
+
   // This feature controls whether we do the expensive context update and
   // calculation in the rd coefficient costing loop.
   int use_fast_coef_costing;