Add reduced filter search method for CDEF search

Introduced a level 2 search method with reduced number of
primary filters for CDEF search. This speed feature is enabled
for speed level 5.

            Encode Time
cpu-used     Reduction      Quality Loss
   5           2.33%          +0.111%

STATS_CHANGED

Change-Id: I42923e2d92b6353860ebc7d755b2fbcef7590fe1
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index e773345..e3279d5 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -21,19 +21,40 @@
 #include "av1/common/reconinter.h"
 #include "av1/encoder/encoder.h"
 
-#define REDUCED_PRI_STRENGTHS 8
-#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
+#define REDUCED_PRI_STRENGTHS_LVL1 8
+#define REDUCED_PRI_STRENGTHS_LVL2 5
+
+#define REDUCED_TOTAL_STRENGTHS_LVL1 \
+  (REDUCED_PRI_STRENGTHS_LVL1 * CDEF_SEC_STRENGTHS)
+#define REDUCED_TOTAL_STRENGTHS_LVL2 \
+  (REDUCED_PRI_STRENGTHS_LVL2 * CDEF_SEC_STRENGTHS)
 #define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS)
 
-static const int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 5, 7, 10, 13 };
+static const int priconv_lvl1[REDUCED_TOTAL_STRENGTHS_LVL1] = { 0, 1, 2,  3,
+                                                                5, 7, 10, 13 };
+static const int priconv_lvl2[REDUCED_TOTAL_STRENGTHS_LVL2] = { 0, 2, 4, 8,
+                                                                14 };
+static const int nb_cdef_strengths[CDEF_PICK_METHODS] = {
+  TOTAL_STRENGTHS, REDUCED_TOTAL_STRENGTHS_LVL1, REDUCED_TOTAL_STRENGTHS_LVL2,
+  TOTAL_STRENGTHS
+};
+
+// Get primary strength value for the given index and search method
+static INLINE int get_pri_strength(CDEF_PICK_METHOD pick_method, int pri_idx) {
+  switch (pick_method) {
+    case CDEF_FAST_SEARCH_LVL1: return priconv_lvl1[pri_idx];
+    case CDEF_FAST_SEARCH_LVL2: return priconv_lvl2[pri_idx];
+    default: assert(0 && "Invalid CDEF primary index"); return -1;
+  }
+}
 
 /* Search for the best strength to add as an option, knowing we
    already selected nb_strengths options. */
 static uint64_t search_one(int *lev, int nb_strengths,
                            uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
-                           int fast) {
+                           CDEF_PICK_METHOD pick_method) {
   uint64_t tot_mse[TOTAL_STRENGTHS];
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+  const int total_strengths = nb_cdef_strengths[pick_method];
   int i, j;
   uint64_t best_tot_mse = (uint64_t)1 << 63;
   int best_id = 0;
@@ -68,13 +89,13 @@
    already selected nb_strengths options. */
 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
-                                int fast) {
+                                CDEF_PICK_METHOD pick_method) {
   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
   int i, j;
   uint64_t best_tot_mse = (uint64_t)1 << 63;
   int best_id0 = 0;
   int best_id1 = 0;
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+  const int total_strengths = nb_cdef_strengths[pick_method];
   memset(tot_mse, 0, sizeof(tot_mse));
   for (i = 0; i < sb_count; i++) {
     int gi;
@@ -117,13 +138,16 @@
 /* Search for the set of strengths that minimizes mse. */
 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
                                       uint64_t mse[][TOTAL_STRENGTHS],
-                                      int sb_count, int fast) {
+                                      int sb_count,
+                                      CDEF_PICK_METHOD pick_method) {
   uint64_t best_tot_mse;
+  int fast = (pick_method == CDEF_FAST_SEARCH_LVL1 ||
+              pick_method == CDEF_FAST_SEARCH_LVL2);
   int i;
   best_tot_mse = (uint64_t)1 << 63;
   /* Greedy search: add one strength options at a time. */
   for (i = 0; i < nb_strengths; i++) {
-    best_tot_mse = search_one(best_lev, i, mse, sb_count, fast);
+    best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
   }
   /* Trying to refine the greedy search by reconsidering each
      already-selected option. */
@@ -132,7 +156,7 @@
       int j;
       for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
       best_tot_mse =
-          search_one(best_lev, nb_strengths - 1, mse, sb_count, fast);
+          search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
     }
   }
   return best_tot_mse;
@@ -142,14 +166,15 @@
 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
                                            int nb_strengths,
                                            uint64_t (**mse)[TOTAL_STRENGTHS],
-                                           int sb_count, int fast) {
+                                           int sb_count,
+                                           CDEF_PICK_METHOD pick_method) {
   uint64_t best_tot_mse;
   int i;
   best_tot_mse = (uint64_t)1 << 63;
   /* Greedy search: add one strength options at a time. */
   for (i = 0; i < nb_strengths; i++) {
     best_tot_mse =
-        search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast);
+        search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
   }
   /* Trying to refine the greedy search by reconsidering each
      already-selected option. */
@@ -160,7 +185,7 @@
       best_lev1[j] = best_lev1[j + 1];
     }
     best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
-                                   sb_count, fast);
+                                   sb_count, pick_method);
   }
   return best_tot_mse;
 }
@@ -335,8 +360,9 @@
   const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
   int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   const int damping = 3 + (cm->base_qindex >> 6);
-  const int fast = pick_method == CDEF_FAST_SEARCH;
-  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+  const int fast = (pick_method == CDEF_FAST_SEARCH_LVL1 ||
+                    pick_method == CDEF_FAST_SEARCH_LVL2);
+  const int total_strengths = nb_cdef_strengths[pick_method];
   DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
   const int num_planes = av1_num_planes(cm);
   av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
@@ -459,7 +485,7 @@
         const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
         for (int gi = 0; gi < total_strengths; gi++) {
           int pri_strength = gi / CDEF_SEC_STRENGTHS;
-          if (fast) pri_strength = priconv[pri_strength];
+          if (fast) pri_strength = get_pri_strength(pick_method, pri_strength);
           const int sec_strength = gi % CDEF_SEC_STRENGTHS;
           copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
                        src[pli], row - yoff, col - xoff, stride[pli], ysize,
@@ -493,10 +519,10 @@
     uint64_t tot_mse;
     if (num_planes > 1) {
       tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
-                                           mse, sb_count, fast);
+                                           mse, sb_count, pick_method);
     } else {
       tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
-                                      fast);
+                                      pick_method);
     }
 
     const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
@@ -536,12 +562,15 @@
     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
       const int luma_strength = cdef_info->cdef_strengths[j];
       const int chroma_strength = cdef_info->cdef_uv_strengths[j];
-      cdef_info->cdef_strengths[j] =
-          priconv[luma_strength / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS +
-          (luma_strength % CDEF_SEC_STRENGTHS);
-      cdef_info->cdef_uv_strengths[j] =
-          priconv[chroma_strength / CDEF_SEC_STRENGTHS] * CDEF_SEC_STRENGTHS +
-          (chroma_strength % CDEF_SEC_STRENGTHS);
+      int pri_strength;
+      pri_strength =
+          get_pri_strength(pick_method, luma_strength / CDEF_SEC_STRENGTHS);
+      cdef_info->cdef_strengths[j] = pri_strength * CDEF_SEC_STRENGTHS +
+                                     (luma_strength % CDEF_SEC_STRENGTHS);
+      pri_strength =
+          get_pri_strength(pick_method, chroma_strength / CDEF_SEC_STRENGTHS);
+      cdef_info->cdef_uv_strengths[j] = pri_strength * CDEF_SEC_STRENGTHS +
+                                        (chroma_strength % CDEF_SEC_STRENGTHS);
     }
   }
 
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 6230c17..31fb20a 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -378,7 +378,7 @@
     sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
     sf->rd_sf.tx_domain_dist_thres_level = 1;
 
-    sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH;
+    sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
     sf->lpf_sf.dual_sgr_penalty_level = 1;
     sf->lpf_sf.enable_sgr_ep_pruning = 1;
 
@@ -563,6 +563,9 @@
     sf->inter_sf.disable_onesided_comp = 1;
 
     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
+    sf->lpf_sf.cdef_pick_method = cm->allow_screen_content_tools
+                                      ? CDEF_FAST_SEARCH_LVL1
+                                      : CDEF_FAST_SEARCH_LVL2;
     sf->lpf_sf.disable_lr_filter = 1;
 
     sf->mv_sf.prune_mesh_search = 1;
@@ -691,7 +694,7 @@
     sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
     sf->tx_sf.model_based_prune_tx_search_level = 0;
 
-    sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH;
+    sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
   }
 
   if (speed >= 3) {
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index abac1a8..8f04e08 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -173,8 +173,10 @@
 
 enum {
   CDEF_FULL_SEARCH,
-  CDEF_FAST_SEARCH,  // Search among a subset of all possible filters.
-  CDEF_PICK_FROM_Q   // Estimate filter strength based on quantizer.
+  CDEF_FAST_SEARCH_LVL1,  // Search among a subset of all possible filters.
+  CDEF_FAST_SEARCH_LVL2,  // Search reduced subset of filters than Level 1.
+  CDEF_PICK_FROM_Q,       // Estimate filter strength based on quantizer.
+  CDEF_PICK_METHODS
 } UENUM1BYTE(CDEF_PICK_METHOD);
 
 enum {