Add experiment CONFIG_CDEF_SINGLEPASS: Make CDEF single pass

Low latency, cpu-used=0:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.3162 | -0.6719 | -0.6535 |   0.0089 | -0.3890 | -0.1515 |    -0.6682

High latency, cpu-used=0:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0293 | -0.3556 | -0.5505 |   0.0684 | -0.0862 |  0.0513 |    -0.2765

Low latency, cpu-used=4:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.2248 | -0.7764 | -0.6630 |  -0.2109 | -0.3240 | -0.2532 |    -0.6980

High latency, cpu-used=4:
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.1118 | -0.5841 | -0.7406 |  -0.0463 | -0.2442 | -0.1064 |    -0.4187

Change-Id: I9ca8399c8f45489541a66f535fb3d771eb1d59ab
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 4a6fbd8..c86ef3a 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -3454,8 +3454,13 @@
 #if CONFIG_CDEF
 static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
   int i;
+#if CONFIG_CDEF_SINGLEPASS
+  aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2);
+  assert(cm->cdef_pri_damping == cm->cdef_sec_damping);
+#else
   aom_wb_write_literal(wb, cm->cdef_pri_damping - 5, 1);
   aom_wb_write_literal(wb, cm->cdef_sec_damping - 3, 2);
+#endif
   aom_wb_write_literal(wb, cm->cdef_bits, 2);
   for (i = 0; i < cm->nb_cdef_strengths; i++) {
     aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index da2370b..f3f7799 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -4206,7 +4206,7 @@
     cm->cdef_strengths[0] = 0;
     cm->nb_cdef_strengths = 1;
   } else {
-    // Find cm->dering_level, cm->clpf_strength_u and cm->clpf_strength_v
+    // Find CDEF parameters
     av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd,
                     cpi->oxcf.speed > 0);
 
diff --git a/av1/encoder/pickcdef.c b/av1/encoder/pickcdef.c
index 443e9e5..accc97e 100644
--- a/av1/encoder/pickcdef.c
+++ b/av1/encoder/pickcdef.c
@@ -68,11 +68,16 @@
                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
                                 int fast) {
   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
+#if !CONFIG_CDEF_SINGLEPASS
   const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+#endif
   int i, j;
   uint64_t best_tot_mse = (uint64_t)1 << 63;
   int best_id0 = 0;
   int best_id1 = 0;
+#if CONFIG_CDEF_SINGLEPASS
+  const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS;
+#endif
   memset(tot_mse, 0, sizeof(tot_mse));
   for (i = 0; i < sb_count; i++) {
     int gi;
@@ -305,7 +310,11 @@
   int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index));
   uint64_t(*mse[2])[TOTAL_STRENGTHS];
+#if CONFIG_CDEF_SINGLEPASS
+  int pri_damping = 3 + (cm->base_qindex >> 6);
+#else
   int pri_damping = 6;
+#endif
   int sec_damping = 3 + (cm->base_qindex >> 6);
   int i;
   int nb_strengths;
@@ -414,6 +423,17 @@
           int xsize = (nhb << mi_wide_l2[pli]) +
                       CDEF_HBORDER * (fbc != nhfb - 1) + xoff;
           sec_strength = gi % CDEF_SEC_STRENGTHS;
+#if CONFIG_CDEF_SINGLEPASS
+          copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
+                       src[pli],
+                       (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
+                       (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
+                       stride[pli], ysize, xsize);
+          cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli],
+                         dir, &dirinit, var, pli, dlist, cdef_count, threshold,
+                         sec_strength + (sec_strength == 3), pri_damping,
+                         sec_damping, coeff_shift);
+#else
           if (sec_strength == 0)
             copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
                          src[pli],
@@ -425,6 +445,7 @@
                          pli, dlist, cdef_count, threshold,
                          sec_strength + (sec_strength == 3), sec_damping,
                          pri_damping, coeff_shift, sec_strength != 0, 1);
+#endif
           curr_mse = compute_cdef_dist(
               ref_coeff[pli] +
                   (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +