Jointly optimizing deringing and clpf

We now signal joint strengths and use a greedy algorithm for the search.

low-latency, cpu-used=4:

ll4-cdef@2017-03-22T03:42:10.815Z -> ll4-cdef-newsearch-var-header-newlambda-refine4@2017-03-22T15:56:46.471Z

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0792 |  0.3551 |  0.4393 |  -0.0108 | -0.1338 | -0.0141 |     0.1452

Change-Id: I619ae1c7c7d7ec04fe993cabc5773b07c3f5b201
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 3bbce0f..fdc7ac8 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -2785,14 +2785,10 @@
   if (bsize == BLOCK_64X64 &&
 #endif  // CONFIG_EXT_PARTITION
              !sb_all_skip(cm, mi_row, mi_col)) {
-    if (cm->dering_bits)
+    if (cm->cdef_bits != 0)
       aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
-                               ->mbmi.dering_gain,
-                        cm->dering_bits);
-    if (cm->clpf_bits)
-      aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
-                               ->mbmi.clpf_strength,
-                        cm->clpf_bits);
+                               ->mbmi.cdef_strength,
+                        cm->cdef_bits);
   }
 #endif
 }
@@ -3496,7 +3492,11 @@
 
 #if CONFIG_CDEF
 static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
-  aom_wb_write_literal(wb, cm->dering_level, DERING_LEVEL_BITS);
+  int i;
+  aom_wb_write_literal(wb, cm->cdef_bits, 2);
+  for (i = 0; i < cm->nb_cdef_strengths; i++) {
+    aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
+  }
   aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
   aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
 }