Making deringing buffer only one row of superblocks at a time This introduces a line buffer that hold the last three lines of each original row so that the next row can be deringed with the original input of the upper row. No change in output Change-Id: I8fad3bc48745e9ce3e440289f453477a0c5442c0

commit: b154a24283479bf0fb5253133b8b58441510a612 [log] [tgz]
author: Jean-Marc Valin <jmvalin@mozilla.com> Fri Oct 14 13:05:14 2016 -0400
committer: Yaowu Xu <yaowu@google.com> Wed Nov 02 22:48:19 2016 +0000
tree: 2b90b3227113d3cfb1fdcd535aef714e3e6740e6
parent: ca1eb5dc58fc49a688257fc39f25b76bbe1dc35d [diff]
diff --git a/av1/common/dering.c b/av1/common/dering.c
index 285cc3e..eea20ce 100644
--- a/av1/common/dering.c
+++ b/av1/common/dering.c

@@ -119,6 +119,9 @@
   int sbr, sbc;
   int nhsb, nvsb;
   od_dering_in *src[3];
+  int16_t *linebuf[3];
+  int16_t *curr_linebuf[3];
+  int16_t *prev_linebuf[3];
   unsigned char bskip[MAX_MIB_SIZE*MAX_MIB_SIZE][2];
   int dering_count;
   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
@@ -142,24 +145,42 @@
   }
   stride = cm->mi_cols << bsize[0];
   for (pli = 0; pli < nplanes; pli++) {
-    src[pli] = aom_malloc(sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
-    for (r = 0; r < cm->mi_rows << bsize[pli]; ++r) {
-      for (c = 0; c < cm->mi_cols << bsize[pli]; ++c) {
-#if CONFIG_AOM_HIGHBITDEPTH
-        if (cm->use_highbitdepth) {
-          src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(
-              xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
-        } else {
-#endif
-          src[pli][r * stride + c] =
-              xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
-#if CONFIG_AOM_HIGHBITDEPTH
-        }
-#endif
-      }
-    }
+    int i;
+    src[pli] = aom_malloc(sizeof(*src) * (MAX_MIB_SIZE*8 + OD_FILT_VBORDER) *
+                          cm->mi_cols * 8);
+    linebuf[pli] = aom_malloc(sizeof(*linebuf) * 2*OD_FILT_VBORDER * stride);
+    prev_linebuf[pli] = linebuf[pli];
+    curr_linebuf[pli] = linebuf[pli] + OD_FILT_VBORDER * stride;
+    for (i = 0; i < OD_FILT_VBORDER * stride; i++)
+      prev_linebuf[pli][i] = OD_DERING_VERY_LARGE;
   }
   for (sbr = 0; sbr < nvsb; sbr++) {
+    for (pli = 0; pli < nplanes; pli++) {
+      for (r = 0; r < (MAX_MIB_SIZE << bsize[pli]) + OD_FILT_VBORDER; r++) {
+        for (c = 0; c < cm->mi_cols << bsize[pli]; ++c) {
+  #if CONFIG_AOM_HIGHBITDEPTH
+          if (cm->use_highbitdepth) {
+            src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(
+                xd->plane[pli].dst.buf)[((MAX_MIB_SIZE << bsize[pli]) * sbr + r)
+                                        * xd->plane[pli].dst.stride + c];
+          } else {
+  #endif
+            src[pli][r * stride + c] =
+                xd->plane[pli].dst.buf[((MAX_MIB_SIZE << bsize[pli]) * sbr + r)
+                                       * xd->plane[pli].dst.stride + c];
+  #if CONFIG_AOM_HIGHBITDEPTH
+          }
+  #endif
+        }
+      }
+      for (r = 0; r < OD_FILT_VBORDER; r++) {
+        for (c = 0; c < stride >> dec[pli]; c++) {
+          curr_linebuf[pli][r * stride + c] =
+              src[pli][((MAX_MIB_SIZE << bsize[pli]) - OD_FILT_VBORDER + r) *
+                       stride + c];
+        }
+      }
+    }
     for (sbc = 0; sbc < nhsb; sbc++) {
       int level;
       int nhb, nvb;
@@ -190,14 +211,25 @@
            are outside the frame. We could change the filter instead, but it would
            add special cases for any future vectorization. */
         for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
-        for (i = -OD_FILT_VBORDER * (sbr != 0);
-             i < (nvb << bsize[pli]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) {
+        if (sbr != 0) {
+          for (i = -OD_FILT_VBORDER; i < 0; i++) {
+            for (j = -OD_FILT_HBORDER * (sbc != 0);
+                 j < (nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
+                 j++) {
+              in[i * OD_FILT_BSTRIDE + j] =
+                  prev_linebuf[pli][(OD_FILT_VBORDER + i) * stride +
+                                    (sbc * MAX_MIB_SIZE << bsize[pli]) + j];
+            }
+          }
+        }
+        for (i = 0;
+             i < (nvb << bsize[pli]) + OD_FILT_VBORDER * (sbr != nvsb - 1);
+             i++) {
           for (j = -OD_FILT_HBORDER * (sbc != 0);
-               j < (nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1); j++) {
-            int16_t *x;
-            x = &src[pli][(sbr * stride * MAX_MIB_SIZE << bsize[pli]) +
-                          (sbc * MAX_MIB_SIZE << bsize[pli])];
-            in[i * OD_FILT_BSTRIDE + j] = x[i * stride + j];
+               j < (nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
+               j++) {
+            in[i * OD_FILT_BSTRIDE + j] =
+                src[pli][i * stride + (sbc * MAX_MIB_SIZE << bsize[pli]) + j];
           }
         }
         od_dering(dst, in, dec[pli], dir, pli,
@@ -224,8 +256,15 @@
 #endif
       }
     }
+    for (pli = 0; pli < nplanes; pli++) {
+      int16_t *tmp;
+      tmp = prev_linebuf[pli];
+      prev_linebuf[pli] = curr_linebuf[pli];
+      curr_linebuf[pli] = tmp;
+    }
   }
   for (pli = 0; pli < nplanes; pli++) {
     aom_free(src[pli]);
+    aom_free(linebuf[pli]);
   }
 }
commit	b154a24283479bf0fb5253133b8b58441510a612	[log] [tgz]
author	Jean-Marc Valin <jmvalin@mozilla.com>	Fri Oct 14 13:05:14 2016 -0400
committer	Yaowu Xu <yaowu@google.com>	Wed Nov 02 22:48:19 2016 +0000
tree	2b90b3227113d3cfb1fdcd535aef714e3e6740e6
parent	ca1eb5dc58fc49a688257fc39f25b76bbe1dc35d [diff]