sb_all_skip_out() now computes a list of deringed blocks

No change in output

Change-Id: I9051ed6e1fbca7d80412ba2b53f7aacbc3ef70eb
diff --git a/av1/common/dering.c b/av1/common/dering.c
index e3138a8..547baf4 100644
--- a/av1/common/dering.c
+++ b/av1/common/dering.c
@@ -9,6 +9,8 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+// clang-format off
+
 #include <string.h>
 #include <math.h>
 
@@ -46,11 +48,12 @@
 }
 
 int sb_all_skip_out(const AV1_COMMON *const cm, int mi_row, int mi_col,
-    unsigned char *bskip) {
+    unsigned char (*bskip)[2], int *count_ptr) {
   int r, c;
   int maxc, maxr;
   int skip = 1;
   MODE_INFO **grid;
+  int count=0;
   grid = cm->mi_grid_visible;
   maxc = cm->mi_cols - mi_col;
   maxr = cm->mi_rows - mi_row;
@@ -60,12 +63,15 @@
     MODE_INFO **grid_row;
     grid_row = &grid[(mi_row + r) * cm->mi_stride + mi_col];
     for (c = 0; c < maxc; c++) {
-      int tmp;
-      tmp = grid_row[c]->mbmi.skip;
-      bskip[r*MAX_MIB_SIZE + c] = tmp;
-      skip = skip && tmp;
+      if (!grid_row[c]->mbmi.skip) {
+        skip = 0;
+        bskip[count][0] = r;
+        bskip[count][1] = c;
+        count++;
+      }
     }
   }
+  *count_ptr = count;
   return skip;
 }
 
@@ -75,7 +81,8 @@
   int sbr, sbc;
   int nhsb, nvsb;
   od_dering_in *src[3];
-  unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE];
+  unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE][2];
+  int dering_count;
   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
   int stride;
   int bsize[3];
@@ -125,7 +132,7 @@
                                             MAX_MIB_SIZE * sbc]
                             ->mbmi.dering_gain);
       if (level == 0 ||
-          sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip))
+          sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip, &dering_count))
         continue;
       for (pli = 0; pli < nplanes; pli++) {
         int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
@@ -141,7 +148,7 @@
                   &src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
                             sbc * bsize[pli] * MAX_MIB_SIZE],
                   stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
-                  bskip, MAX_MIB_SIZE, threshold, coeff_shift);
+                  bskip, dering_count, threshold, coeff_shift);
         for (r = 0; r < bsize[pli] * nvb; ++r) {
           for (c = 0; c < bsize[pli] * nhb; ++c) {
 #if CONFIG_AOM_HIGHBITDEPTH
diff --git a/av1/common/dering.h b/av1/common/dering.h
index 77696ac..c906994 100644
--- a/av1/common/dering.h
+++ b/av1/common/dering.h
@@ -32,7 +32,7 @@
 int compute_level_from_index(int global_level, int gi);
 int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
 int sb_all_skip_out(const AV1_COMMON *const cm, int mi_row, int mi_col,
-    unsigned char *bskip);
+    unsigned char (*bskip)[2], int *count_ptr);
 void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                       MACROBLOCKD *xd, int global_level);
 
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
index 7aa704f..c15fd11 100644
--- a/av1/common/od_dering.c
+++ b/av1/common/od_dering.c
@@ -265,10 +265,11 @@
 void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
                int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
                int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
-               unsigned char *bskip, int skip_stride, int threshold,
+               unsigned char (*bskip)[2], int dering_count, int threshold,
                int coeff_shift) {
   int i;
   int j;
+  int bi;
   int bx;
   int by;
   int16_t inbuf[OD_DERING_INBUF_SIZE];
@@ -303,34 +304,32 @@
     }
   }
   if (pli == 0) {
-    for (by = 0; by < nvb; by++) {
-      for (bx = 0; bx < nhb; bx++) {
-        if (bskip[by * skip_stride + bx]) continue;
-        dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
-                                   &var[by][bx], coeff_shift);
-        /* Deringing orthogonal to the direction uses a tighter threshold
-           because we want to be conservative. We've presumably already
-           achieved some deringing, so the amount of change is expected
-           to be low. Also, since we might be filtering across an edge, we
-           want to make sure not to blur it. That being said, we might want
-           to be a little bit more aggressive on pure horizontal/vertical
-           since the ringing there tends to be directional, so it doesn't
-           get removed by the directional filtering. */
-        filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
-            &y[(by * ystride << bsize) + (bx << bsize)], ystride,
-            &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
-            od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
-      }
+    for (bi = 0; bi < dering_count; bi++) {
+      by = bskip[bi][0];
+      bx = bskip[bi][1];
+      dir[by][bx] = od_dir_find8(&x[8 * by * xstride + 8 * bx], xstride,
+                                 &var[by][bx], coeff_shift);
+      /* Deringing orthogonal to the direction uses a tighter threshold
+         because we want to be conservative. We've presumably already
+         achieved some deringing, so the amount of change is expected
+         to be low. Also, since we might be filtering across an edge, we
+         want to make sure not to blur it. That being said, we might want
+         to be a little bit more aggressive on pure horizontal/vertical
+         since the ringing there tends to be directional, so it doesn't
+         get removed by the directional filtering. */
+      filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
+          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
+          od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
     }
   } else {
-    for (by = 0; by < nvb; by++) {
-      for (bx = 0; bx < nhb; bx++) {
-        if (bskip[by * skip_stride + bx]) continue;
-        filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
-            &y[(by * ystride << bsize) + (bx << bsize)], ystride,
-            &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
-            dir[by][bx]);
-      }
+    for (bi = 0; bi < dering_count; bi++) {
+      by = bskip[bi][0];
+      bx = bskip[bi][1];
+      filter2_thresh[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
+          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], threshold,
+          dir[by][bx]);
     }
   }
   for (i = 0; i < nvb << bsize; i++) {
@@ -338,13 +337,13 @@
       in[i * OD_FILT_BSTRIDE + j] = y[i * ystride + j];
     }
   }
-  for (by = 0; by < nvb; by++) {
-    for (bx = 0; bx < nhb; bx++) {
-      if (bskip[by * skip_stride + bx] || filter2_thresh[by][bx] == 0) continue;
-      (filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
-          &y[(by * ystride << bsize) + (bx << bsize)], ystride,
-          &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
-          dir[by][bx]);
-    }
+  for (bi = 0; bi < dering_count; bi++) {
+    by = bskip[bi][0];
+    bx = bskip[bi][1];
+    if (filter2_thresh[by][bx] == 0) continue;
+    (filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
+        &y[(by * ystride << bsize) + (bx << bsize)], ystride,
+        &in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
+        dir[by][bx]);
   }
 }
diff --git a/av1/common/od_dering.h b/av1/common/od_dering.h
index c64439f..d027f5b 100644
--- a/av1/common/od_dering.h
+++ b/av1/common/od_dering.h
@@ -12,6 +12,8 @@
 #if !defined(_dering_H)
 #define _dering_H (1)
 
+// clang-format off
+
 #include "odintrin.h"
 
 #if defined(DAALA_ODINTRIN)
@@ -37,7 +39,7 @@
 void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
                int nvb, int nhb, int sbx, int sby, int nhsb, int nvsb, int xdec,
                int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
-               unsigned char *bskip, int skip_stride, int threshold,
+               unsigned char (*bskip)[2], int skip_stride, int threshold,
                int coeff_shift);
 int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
                                      int threshold, int dir);
diff --git a/av1/encoder/pickdering.c b/av1/encoder/pickdering.c
index cf5f21e..a89c34e 100644
--- a/av1/encoder/pickdering.c
+++ b/av1/encoder/pickdering.c
@@ -9,6 +9,8 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+// clang-format off
+
 #include <string.h>
 #include <math.h>
 
@@ -41,7 +43,7 @@
   int nhsb, nvsb;
   od_dering_in *src;
   int16_t *ref_coeff;
-  unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE];
+  unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE][2];
   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
   int stride;
   int bsize[3];
@@ -49,6 +51,7 @@
   int pli;
   int level;
   int best_level;
+  int dering_count;
   int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
   src = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
   ref_coeff = aom_malloc(sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
@@ -97,7 +100,7 @@
       int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
       nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
       nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
-      if (sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip))
+      if (sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip, &dering_count))
         continue;
       best_gi = 0;
       for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
@@ -111,7 +114,7 @@
                   cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0,
                   dir, 0,
                   bskip,
-                  MAX_MIB_SIZE, threshold, coeff_shift);
+                  dering_count, threshold, coeff_shift);
         cur_mse = (int)compute_dist(
             dst, MAX_MIB_SIZE * bsize[0],
             &ref_coeff[sbr * stride * bsize[0] * MAX_MIB_SIZE +