compute bskip as we go

Change-Id: I5681e3329ad3677296161de59f5ff1236a14f086
diff --git a/av1/common/dering.c b/av1/common/dering.c
index b0c8ca0..e3138a8 100644
--- a/av1/common/dering.c
+++ b/av1/common/dering.c
@@ -45,13 +45,37 @@
   return skip;
 }
 
+int sb_all_skip_out(const AV1_COMMON *const cm, int mi_row, int mi_col,
+    unsigned char *bskip) {
+  int r, c;
+  int maxc, maxr;
+  int skip = 1;
+  MODE_INFO **grid;
+  grid = cm->mi_grid_visible;
+  maxc = cm->mi_cols - mi_col;
+  maxr = cm->mi_rows - mi_row;
+  if (maxr > MAX_MIB_SIZE) maxr = MAX_MIB_SIZE;
+  if (maxc > MAX_MIB_SIZE) maxc = MAX_MIB_SIZE;
+  for (r = 0; r < maxr; r++) {
+    MODE_INFO **grid_row;
+    grid_row = &grid[(mi_row + r) * cm->mi_stride + mi_col];
+    for (c = 0; c < maxc; c++) {
+      int tmp;
+      tmp = grid_row[c]->mbmi.skip;
+      bskip[r*MAX_MIB_SIZE + c] = tmp;
+      skip = skip && tmp;
+    }
+  }
+  return skip;
+}
+
 void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                       MACROBLOCKD *xd, int global_level) {
   int r, c;
   int sbr, sbc;
   int nhsb, nvsb;
   od_dering_in *src[3];
-  unsigned char *bskip;
+  unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE];
   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
   int stride;
   int bsize[3];
@@ -66,7 +90,6 @@
     nplanes = 1;
   nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
   nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
-  bskip = aom_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
   av1_setup_dst_planes(xd->plane, frame, 0, 0);
   for (pli = 0; pli < nplanes; pli++) {
     dec[pli] = xd->plane[pli].subsampling_x;
@@ -91,13 +114,6 @@
       }
     }
   }
-  for (r = 0; r < cm->mi_rows; ++r) {
-    for (c = 0; c < cm->mi_cols; ++c) {
-      const MB_MODE_INFO *mbmi =
-          &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
-      bskip[r * cm->mi_cols + c] = mbmi->skip;
-    }
-  }
   for (sbr = 0; sbr < nvsb; sbr++) {
     for (sbc = 0; sbc < nhsb; sbc++) {
       int level;
@@ -108,7 +124,8 @@
           global_level, cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
                                             MAX_MIB_SIZE * sbc]
                             ->mbmi.dering_gain);
-      if (level == 0 || sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE))
+      if (level == 0 ||
+          sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip))
         continue;
       for (pli = 0; pli < nplanes; pli++) {
         int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
@@ -124,8 +141,7 @@
                   &src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
                             sbc * bsize[pli] * MAX_MIB_SIZE],
                   stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
-                  &bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
-                  cm->mi_cols, threshold, coeff_shift);
+                  bskip, MAX_MIB_SIZE, threshold, coeff_shift);
         for (r = 0; r < bsize[pli] * nvb; ++r) {
           for (c = 0; c < bsize[pli] * nhb; ++c) {
 #if CONFIG_AOM_HIGHBITDEPTH
@@ -152,5 +168,4 @@
   for (pli = 0; pli < nplanes; pli++) {
     aom_free(src[pli]);
   }
-  aom_free(bskip);
 }
diff --git a/av1/common/dering.h b/av1/common/dering.h
index 7c93f8b..77696ac 100644
--- a/av1/common/dering.h
+++ b/av1/common/dering.h
@@ -11,6 +11,8 @@
 #ifndef AV1_COMMON_DERING_H_
 #define AV1_COMMON_DERING_H_
 
+// clang-format off
+
 #include "av1/common/od_dering.h"
 #include "av1/common/onyxc_int.h"
 #include "aom/aom_integer.h"
@@ -29,6 +31,8 @@
 
 int compute_level_from_index(int global_level, int gi);
 int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
+int sb_all_skip_out(const AV1_COMMON *const cm, int mi_row, int mi_col,
+    unsigned char *bskip);
 void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                       MACROBLOCKD *xd, int global_level);
 
diff --git a/av1/encoder/pickdering.c b/av1/encoder/pickdering.c
index 4ef83cd..cf5f21e 100644
--- a/av1/encoder/pickdering.c
+++ b/av1/encoder/pickdering.c
@@ -41,7 +41,7 @@
   int nhsb, nvsb;
   od_dering_in *src;
   int16_t *ref_coeff;
-  unsigned char *bskip;
+  unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE];
   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
   int stride;
   int bsize[3];
@@ -52,7 +52,6 @@
   int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
   src = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
   ref_coeff = aom_malloc(sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
-  bskip = aom_malloc(sizeof(*bskip) * cm->mi_rows * cm->mi_cols);
   av1_setup_dst_planes(xd->plane, frame, 0, 0);
   for (pli = 0; pli < 3; pli++) {
     dec[pli] = xd->plane[pli].subsampling_x;
@@ -77,13 +76,6 @@
 #endif
     }
   }
-  for (r = 0; r < cm->mi_rows; ++r) {
-    for (c = 0; c < cm->mi_cols; ++c) {
-      const MB_MODE_INFO *mbmi =
-          &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
-      bskip[r * cm->mi_cols + c] = mbmi->skip;
-    }
-  }
   nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
   nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
   /* Pick a base threshold based on the quantizer. The threshold will then be
@@ -105,7 +97,8 @@
       int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
       nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
       nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
-      if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
+      if (sb_all_skip_out(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, bskip))
+        continue;
       best_gi = 0;
       for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
         int cur_mse;
@@ -117,8 +110,8 @@
                        sbc * bsize[0] * MAX_MIB_SIZE],
                   cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0,
                   dir, 0,
-                  &bskip[MAX_MIB_SIZE * sbr * cm->mi_cols + MAX_MIB_SIZE * sbc],
-                  cm->mi_cols, threshold, coeff_shift);
+                  bskip,
+                  MAX_MIB_SIZE, threshold, coeff_shift);
         cur_mse = (int)compute_dist(
             dst, MAX_MIB_SIZE * bsize[0],
             &ref_coeff[sbr * stride * bsize[0] * MAX_MIB_SIZE +
@@ -136,6 +129,5 @@
   }
   aom_free(src);
   aom_free(ref_coeff);
-  aom_free(bskip);
   return best_level;
 }