Optimize av1_loop_restoration_filter_frame

1. avoid frame allocation for each frame
2. remove unnecessary copy if frame_restoration_type is RESTORE_NONE

Decoder speedup: ~5%

Change-Id: If49027e04244174e7f34bb1bb90aa53438a82df0
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 410fd6c..f8eca2c 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -181,6 +181,8 @@
     boundaries->stripe_boundary_above = NULL;
     boundaries->stripe_boundary_below = NULL;
   }
+
+  aom_free_frame_buffer(&cm->rst_frame);
 }
 
 #if LOOP_FILTER_BITMASK
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 290918a..c44e9a2 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -437,6 +437,9 @@
   // Pointer to a scratch buffer used by self-guided restoration
   int32_t *rst_tmpbuf;
 
+  // Output of loop restoration
+  YV12_BUFFER_CONFIG rst_frame;
+
   // Flag signaling how frame contexts should be updated at the end of
   // a frame decode
   REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 2e81c41..64a0cf3 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -1358,11 +1358,11 @@
   static const copy_fun copy_funs[3] = { aom_yv12_copy_y, aom_yv12_copy_u,
                                          aom_yv12_copy_v };
 
-  YV12_BUFFER_CONFIG dst;
-  memset(&dst, 0, sizeof(dst));
+  YV12_BUFFER_CONFIG *dst = &cm->rst_frame;
+
   const int frame_width = frame->crop_widths[0];
   const int frame_height = frame->crop_heights[0];
-  if (aom_realloc_frame_buffer(&dst, frame_width, frame_height,
+  if (aom_realloc_frame_buffer(dst, frame_width, frame_height,
                                cm->subsampling_x, cm->subsampling_y,
                                cm->use_highbitdepth, AOM_BORDER_IN_PIXELS,
                                cm->byte_alignment, NULL, NULL, NULL) < 0)
@@ -1377,7 +1377,6 @@
     const RestorationInfo *rsi = &cm->rst_info[plane];
     RestorationType rtype = rsi->frame_restoration_type;
     if (rtype == RESTORE_NONE) {
-      copy_funs[plane](frame, &dst);
       continue;
     }
 
@@ -1398,19 +1397,16 @@
     ctxt.highbd = highbd;
     ctxt.bit_depth = bit_depth;
     ctxt.data8 = frame->buffers[plane];
-    ctxt.dst8 = dst.buffers[plane];
+    ctxt.dst8 = dst->buffers[plane];
     ctxt.data_stride = frame->strides[is_uv];
-    ctxt.dst_stride = dst.strides[is_uv];
+    ctxt.dst_stride = dst->strides[is_uv];
     ctxt.tmpbuf = cm->rst_tmpbuf;
 
     av1_foreach_rest_unit_in_frame(cm, plane, filter_frame_on_tile,
                                    filter_frame_on_unit, &ctxt);
-  }
 
-  for (int plane = 0; plane < num_planes; ++plane) {
-    copy_funs[plane](&dst, frame);
+    copy_funs[plane](dst, frame);
   }
-  aom_free_frame_buffer(&dst);
 }
 
 static void foreach_rest_unit_in_tile(const AV1PixelRect *tile_rect,