Add threshold to superblock filter level selection

Signal one bit to indicate whether current superblock reuses filter
level of previous superblock.
Try filtering and compute sse using previous filter level and search
the best possible filter level. If their diff is less than predefined
threshold, current superblock reuses previous level. Otherwise,
signal the best filter level.

Change-Id: Ibf125860883b774ef2464b62bb8b799b48258c64
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index ddf4e97..7db0afc 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -14,9 +14,9 @@
 #include <stdio.h>
 
 #include "aom/aom_encoder.h"
-#include "aom_dsp/bitwriter_buffer.h"
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_dsp/binary_codes_writer.h"
+#include "aom_dsp/bitwriter_buffer.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem_ops.h"
 #include "aom_ports/system_state.h"
@@ -3153,11 +3153,15 @@
 
       const uint8_t curr_lvl = curr_mbmi->filt_lvl;
       const uint8_t prev_lvl = prev_mbmi->filt_lvl;
-      const int sign = curr_lvl > prev_lvl;
-      const unsigned int delta = abs(curr_lvl - prev_lvl);
 
-      aom_write_literal(w, delta, LPF_DELTA_BITS);
-      if (delta) aom_write_literal(w, sign, 1);
+      aom_write_literal(w, curr_lvl == prev_lvl, 1);
+      if (curr_lvl != prev_lvl) {
+        const int sign = curr_lvl > prev_lvl;
+        const unsigned int delta = abs(curr_lvl - prev_lvl);
+
+        aom_write_literal(w, delta, LPF_DELTA_BITS);
+        if (delta) aom_write_literal(w, sign, 1);
+      }
     }
   }
 #endif
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 185bc55..c5c052d 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -125,6 +125,7 @@
   filt_err = compute_sb_y_sse(sd, cm->frame_to_show, mi_row, mi_col);
 #endif  // CONFIG_HIGHBITDEPTH
 
+  // TODO(chengchen): Copy the superblock only
   // Re-instate the unfiltered frame
   aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
 
@@ -142,11 +143,16 @@
   int filt_best = last_lvl;
   MACROBLOCK *x = &cpi->td.mb;
 
-  //  Make a copy of the unfiltered / processed recon buffer
+  // Make a copy of the unfiltered / processed recon buffer
   aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
 
+  int64_t estimate_err =
+      try_filter_frame(sd, cpi, last_lvl, partial_frame, mi_row, mi_col);
+
   int i;
   for (i = min_filter_level; i <= max_filter_level; ++i) {
+    if (i == last_lvl) continue;
+
     int64_t filt_err =
         try_filter_frame(sd, cpi, i, partial_frame, mi_row, mi_col);
     if (filt_err < best_err) {
@@ -155,6 +161,15 @@
     }
   }
 
+  // If previous sb filter level has similar filtering performance as current
+  // best filter level, use previous level such that we can only send one bit
+  // to indicate current filter level is the same as the previous.
+  const int64_t threshold = 700;
+  if ((mi_row > 0 || mi_col > 0) && abs(estimate_err - best_err) < threshold) {
+    best_err = estimate_err;
+    filt_best = last_lvl;
+  }
+
   if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err);
   return filt_best;
 }