Support refinemv in CONFIG_REFINED_MVS_IN_TMVP

This change adds the refined MVs obtained in CONFIG_REFINEMV in the TMVP list, which addresses the issue #339 

When DAMR is off: -0.06 (Y), -0.08 (U), -0.24 (V), -0.07 (YUV)

When DAMR is on: -0.01 (Y), +0.11 (U), +0.02 (V), +0.00 (YUV)
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index e00a842..435192c 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -87,34 +87,45 @@
           refined_mv.as_mv.col = mv_offset_x;
         } else {
 #endif  // CONFIG_AFFINE_REFINEMENT
-          refined_mv.as_mv.row = mi->mv[idx].as_mv.row;
-          refined_mv.as_mv.col = mi->mv[idx].as_mv.col;
+#if CONFIG_REFINEMV
+          // Refined MVs are stored per 4x4 in refinemv_subinfo, but h and
+          // w for TMVP are per 8x8, so (h<<1) and (w<<1) are used here.
+          if (mi->refinemv_flag)
+            refined_mv.as_mv =
+                xd->refinemv_subinfo[(h << 1) * MAX_MIB_SIZE + (w << 1)]
+                    .refinemv[idx]
+                    .as_mv;
+          else
+#endif  // CONFIG_REFINEMV
+            refined_mv.as_mv = mi->mv[idx].as_mv;
 #if CONFIG_AFFINE_REFINEMENT
         }
 #endif  // CONFIG_AFFINE_REFINEMENT
-        if (n == 4) {
-          // Since TMVP is stored per 8x8 unit, for refined MV with 4x4
-          // subblock, take the average of 4 refined MVs
-          refined_mv.as_mv.row +=
-              ROUND_POWER_OF_TWO_SIGNED(xd->mv_delta[0].mv[idx].as_mv.row +
-                                            xd->mv_delta[1].mv[idx].as_mv.row +
-                                            xd->mv_delta[2].mv[idx].as_mv.row +
-                                            xd->mv_delta[3].mv[idx].as_mv.row,
-                                        2 + MV_REFINE_PREC_BITS - 3);
-          refined_mv.as_mv.col +=
-              ROUND_POWER_OF_TWO_SIGNED(xd->mv_delta[0].mv[idx].as_mv.col +
-                                            xd->mv_delta[1].mv[idx].as_mv.col +
-                                            xd->mv_delta[2].mv[idx].as_mv.col +
-                                            xd->mv_delta[3].mv[idx].as_mv.col,
-                                        2 + MV_REFINE_PREC_BITS - 3);
-        } else {
-          int sbmv_stride = bw >> 3;
-          refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
-              xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.row,
-              MV_REFINE_PREC_BITS - 3);
-          refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
-              xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.col,
-              MV_REFINE_PREC_BITS - 3);
+        if (opfl_allowed_for_cur_block(cm, mi)) {
+          if (n == 4) {
+            // Since TMVP is stored per 8x8 unit, for refined MV with 4x4
+            // subblock, take the average of 4 refined MVs
+            refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
+                xd->mv_delta[0].mv[idx].as_mv.row +
+                    xd->mv_delta[1].mv[idx].as_mv.row +
+                    xd->mv_delta[2].mv[idx].as_mv.row +
+                    xd->mv_delta[3].mv[idx].as_mv.row,
+                2 + MV_REFINE_PREC_BITS - 3);
+            refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
+                xd->mv_delta[0].mv[idx].as_mv.col +
+                    xd->mv_delta[1].mv[idx].as_mv.col +
+                    xd->mv_delta[2].mv[idx].as_mv.col +
+                    xd->mv_delta[3].mv[idx].as_mv.col,
+                2 + MV_REFINE_PREC_BITS - 3);
+          } else {
+            int sbmv_stride = bw >> 3;
+            refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
+                xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.row,
+                MV_REFINE_PREC_BITS - 3);
+            refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
+                xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.col,
+                MV_REFINE_PREC_BITS - 3);
+          }
         }
 #if OPFL_MVS_CLAMPED
         refined_mv.as_mv.row =
@@ -282,34 +293,45 @@
             refined_mv.as_mv.col = mv_offset_x;
           } else {
 #endif  // CONFIG_AFFINE_REFINEMENT
-            refined_mv.as_mv.row = mi->mv[idx].as_mv.row;
-            refined_mv.as_mv.col = mi->mv[idx].as_mv.col;
+#if CONFIG_REFINEMV
+            // Refined MVs are stored per 4x4 in refinemv_subinfo, but h and
+            // w for TMVP are per 8x8, so (h<<1) and (w<<1) are used here.
+            if (mi->refinemv_flag)
+              refined_mv.as_mv =
+                  xd->refinemv_subinfo[(h << 1) * MAX_MIB_SIZE + (w << 1)]
+                      .refinemv[idx]
+                      .as_mv;
+            else
+#endif  // CONFIG_REFINEMV
+              refined_mv.as_mv = mi->mv[idx].as_mv;
 #if CONFIG_AFFINE_REFINEMENT
           }
 #endif  // CONFIG_AFFINE_REFINEMENT
-          if (n == 4) {
-            // Since TMVP is stored per 8x8 unit, for refined MV with 4x4
-            // subblock, take the average of 4 refined MVs
-            refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
-                xd->mv_delta[0].mv[idx].as_mv.row +
-                    xd->mv_delta[1].mv[idx].as_mv.row +
-                    xd->mv_delta[2].mv[idx].as_mv.row +
-                    xd->mv_delta[3].mv[idx].as_mv.row,
-                2 + MV_REFINE_PREC_BITS - 3);
-            refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
-                xd->mv_delta[0].mv[idx].as_mv.col +
-                    xd->mv_delta[1].mv[idx].as_mv.col +
-                    xd->mv_delta[2].mv[idx].as_mv.col +
-                    xd->mv_delta[3].mv[idx].as_mv.col,
-                2 + MV_REFINE_PREC_BITS - 3);
-          } else {
-            int sbmv_stride = bw >> 3;
-            refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
-                xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.row,
-                MV_REFINE_PREC_BITS - 3);
-            refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
-                xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.col,
-                MV_REFINE_PREC_BITS - 3);
+          if (opfl_allowed_for_cur_block(cm, mi)) {
+            if (n == 4) {
+              // Since TMVP is stored per 8x8 unit, for refined MV with 4x4
+              // subblock, take the average of 4 refined MVs
+              refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
+                  xd->mv_delta[0].mv[idx].as_mv.row +
+                      xd->mv_delta[1].mv[idx].as_mv.row +
+                      xd->mv_delta[2].mv[idx].as_mv.row +
+                      xd->mv_delta[3].mv[idx].as_mv.row,
+                  2 + MV_REFINE_PREC_BITS - 3);
+              refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
+                  xd->mv_delta[0].mv[idx].as_mv.col +
+                      xd->mv_delta[1].mv[idx].as_mv.col +
+                      xd->mv_delta[2].mv[idx].as_mv.col +
+                      xd->mv_delta[3].mv[idx].as_mv.col,
+                  2 + MV_REFINE_PREC_BITS - 3);
+            } else {
+              int sbmv_stride = bw >> 3;
+              refined_mv.as_mv.row += ROUND_POWER_OF_TWO_SIGNED(
+                  xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.row,
+                  MV_REFINE_PREC_BITS - 3);
+              refined_mv.as_mv.col += ROUND_POWER_OF_TWO_SIGNED(
+                  xd->mv_delta[h * sbmv_stride + w].mv[idx].as_mv.col,
+                  MV_REFINE_PREC_BITS - 3);
+            }
           }
 #if OPFL_MVS_CLAMPED
           refined_mv.as_mv.row =
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 057e9a6..66f3879 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -2151,7 +2151,11 @@
   if (!frame_is_intra_only(cm) &&
       cm->seq_params.order_hint_info.enable_ref_frame_mvs) {
     MB_MODE_INFO *const mi = xd->mi[0];
-    if (opfl_allowed_for_cur_block(cm, mi)) {
+    if (opfl_allowed_for_cur_block(cm, mi)
+#if CONFIG_REFINEMV
+        || (mi->refinemv_flag && mi->interinter_comp.type == COMPOUND_AVERAGE)
+#endif  // CONFIG_REFINEMV
+    ) {
       const int bw = mi_size_wide[bsize];
       const int bh = mi_size_high[bsize];
       const int x_inside_boundary = AOMMIN(bw, cm->mi_params.mi_cols - mi_col);
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 40d6f6b..7dd4298 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -2062,7 +2062,11 @@
 #if CONFIG_REFINED_MVS_IN_TMVP
   if (!dry_run && cm->seq_params.order_hint_info.enable_ref_frame_mvs) {
     const MB_MODE_INFO *const mi = &ctx->mic;
-    if (opfl_allowed_for_cur_block(cm, mi)) {
+    if (opfl_allowed_for_cur_block(cm, mi)
+#if CONFIG_REFINEMV
+        || (mi->refinemv_flag && mi->interinter_comp.type == COMPOUND_AVERAGE)
+#endif  // CONFIG_REFINEMV
+    ) {
       const int bw = mi_size_wide[mi->sb_type[xd->tree_type == CHROMA_PART]];
       const int bh = mi_size_high[mi->sb_type[xd->tree_type == CHROMA_PART]];
       const int x_inside_boundary = AOMMIN(bw, cm->mi_params.mi_cols - mi_col);