Enhance and refactor copying code

Modified the copying code and the profiling showed better performance
than previous implementation.

Change-Id: I41f585e0b0eac7a0deb4dec197c178e412a48db9
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index 758bf60..b89cd85 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -36,9 +36,8 @@
   int w, h;
 
   for (h = 0; h < y_mis; h++) {
-    MV_REF *const frame_mv = frame_mvs + h * frame_mvs_stride;
+    MV_REF *mv = frame_mvs;
     for (w = 0; w < x_mis; w++) {
-      MV_REF *const mv = frame_mv + w;
       mv->ref_frame[0] = mi->mbmi.ref_frame[0];
       mv->ref_frame[1] = mi->mbmi.ref_frame[1];
       mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
@@ -46,7 +45,9 @@
       // (TODO:yunqing) The following 2 lines won't be used and can be removed.
       mv->pred_mv[0].as_int = mi->mbmi.pred_mv[0].as_int;
       mv->pred_mv[1].as_int = mi->mbmi.pred_mv[1].as_int;
+      mv++;
     }
+    frame_mvs += frame_mvs_stride;
   }
 }
 
diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h
index 699c404..2248178 100644
--- a/av1/common/onyxc_int.h
+++ b/av1/common/onyxc_int.h
@@ -614,7 +614,7 @@
     aom_free(buf->mvs);
     buf->mi_rows = cm->mi_rows;
     buf->mi_cols = cm->mi_cols;
-#if CONFIG_TMV
+#if CONFIG_TMV || CONFIG_MFMV
     CHECK_MEM_ERROR(cm, buf->mvs,
                     (MV_REF *)aom_calloc(
                         ((cm->mi_rows + 1) >> 1) * ((cm->mi_cols + 1) >> 1),
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 62a0861..c00586d 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -458,8 +458,14 @@
   xd->cfl->mi_row = mi_row;
   xd->cfl->mi_col = mi_col;
 #endif
-  for (y = 0; y < y_mis; ++y)
-    for (x = !y; x < x_mis; ++x) xd->mi[y * cm->mi_stride + x] = xd->mi[0];
+
+  assert(x_mis && y_mis);
+  for (x = 1; x < x_mis; ++x) xd->mi[x] = xd->mi[0];
+  int idx = cm->mi_stride;
+  for (y = 1; y < y_mis; ++y) {
+    memcpy(&xd->mi[idx], &xd->mi[0], x_mis * sizeof(xd->mi[0]));
+    idx += cm->mi_stride;
+  }
 
   set_plane_n4(xd, bw, bh);
   set_skip_context(xd, mi_row, mi_col);
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 995dca7..0377f7b 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -2814,12 +2814,13 @@
   int w, h;
 
   for (h = 0; h < y_mis; h++) {
-    MV_REF *const frame_mv = frame_mvs + h * frame_mvs_stride;
+    MV_REF *mv = frame_mvs;
     for (w = 0; w < x_mis; w++) {
-      MV_REF *const mv = frame_mv + w;
       mv->ref_frame[0] = NONE_FRAME;
       mv->ref_frame[1] = NONE_FRAME;
+      mv++;
     }
+    frame_mvs += frame_mvs_stride;
   }
 }