Avoid the "initial copy" in the deringing filter

No change in output

Change-Id: I313bde67e59835f88e3b2e6079b0df2d7ed1a903
diff --git a/av1/common/od_dering.c b/av1/common/od_dering.c
index a99c86b..f19291c 100644
--- a/av1/common/od_dering.c
+++ b/av1/common/od_dering.c
@@ -334,13 +334,6 @@
       in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
     }
   }
-  /* Assume deringing filter is sparsely applied, so do one large copy rather
-     than small copies later if deringing is skipped. */
-  for (i = 0; i < nvb << bsize; i++) {
-    for (j = 0; j < nhb << bsize; j++) {
-      y[i * ystride + j] = in[i * OD_FILT_BSTRIDE + j];
-    }
-  }
   if (pli == 0) {
     for (bi = 0; bi < dering_count; bi++) {
       by = bskip[bi][0];
diff --git a/av1/encoder/pickdering.c b/av1/encoder/pickdering.c
index a89c34e..0c79e45 100644
--- a/av1/encoder/pickdering.c
+++ b/av1/encoder/pickdering.c
@@ -108,6 +108,13 @@
         int threshold;
         level = compute_level_from_index(best_level, gi);
         threshold = level << coeff_shift;
+        for (r = 0; r < bsize[0] * nvb; r++) {
+          for (c = 0; c < bsize[0] * nhb; c++) {
+            dst[r * MAX_MIB_SIZE * bsize[0] + c] =
+                src[(sbr * bsize[0] * MAX_MIB_SIZE + r) * stride +
+                    sbc * bsize[0] * MAX_MIB_SIZE + c];
+          }
+        }
         od_dering(dst, MAX_MIB_SIZE * bsize[0],
                   &src[sbr * stride * bsize[0] * MAX_MIB_SIZE +
                        sbc * bsize[0] * MAX_MIB_SIZE],