AV1 RT: Implement Warped Motion mode search in NonRD

Change-Id: I94c652c6b476e53adcbd359b4fab7f577760a12e
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 429321c..658667c 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -1220,10 +1220,15 @@
   // Set motion mode related configuration.
   oxcf->motion_mode_cfg.enable_obmc = extra_cfg->enable_obmc;
   oxcf->motion_mode_cfg.enable_warped_motion = extra_cfg->enable_warped_motion;
+#if !CONFIG_REALTIME_ONLY
+  oxcf->motion_mode_cfg.allow_warped_motion =
+      (extra_cfg->allow_warped_motion & extra_cfg->enable_warped_motion);
+#else
   oxcf->motion_mode_cfg.allow_warped_motion =
       (cfg->g_usage == AOM_USAGE_REALTIME && oxcf->speed >= 7)
           ? false
           : (extra_cfg->allow_warped_motion & extra_cfg->enable_warped_motion);
+#endif
 
   // Set partition related configuration.
   part_cfg->enable_rect_partitions = extra_cfg->enable_rect_partitions;
diff --git a/av1/encoder/nonrd_pickmode.c b/av1/encoder/nonrd_pickmode.c
index c11032c..b42ff27 100644
--- a/av1/encoder/nonrd_pickmode.c
+++ b/av1/encoder/nonrd_pickmode.c
@@ -51,6 +51,9 @@
   uint8_t best_mode_skip_txfm;
   uint8_t best_mode_initial_skip_flag;
   int_interpfilters best_pred_filter;
+  MOTION_MODE best_motion_mode;
+  WarpedMotionParams wm_params;
+  int num_proj_ref;
 } BEST_PICKMODE;
 
 typedef struct {
@@ -125,6 +128,9 @@
   bp->best_mode_skip_txfm = 0;
   bp->best_mode_initial_skip_flag = 0;
   bp->best_pred = NULL;
+  bp->best_motion_mode = SIMPLE_TRANSLATION;
+  bp->num_proj_ref = 0;
+  memset(&bp->wm_params, 0, sizeof(bp->wm_params));
 }
 
 /*!\brief Runs Motion Estimation for a specific block and specific ref frame.
@@ -1602,6 +1608,199 @@
     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
   }
 }
+#if !CONFIG_REALTIME_ONLY
+#define MOTION_MODE_SEARCH_SIZE 2
+
+AOM_INLINE static int is_warped_mode_allowed(const AV1_COMMON *cm,
+                                             MACROBLOCK *const x,
+                                             const MB_MODE_INFO *mbmi) {
+  const FeatureFlags *const features = &cm->features;
+  const MACROBLOCKD *xd = &x->e_mbd;
+
+  if (has_second_ref(mbmi)) return 0;
+  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
+
+  if (features->switchable_motion_mode) {
+    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
+    // is allowed.
+    last_motion_mode_allowed = motion_mode_allowed(
+        xd->global_motion, xd, mbmi, features->allow_warped_motion);
+  }
+
+  if (last_motion_mode_allowed == WARPED_CAUSAL) {
+    return 1;
+  }
+
+  return 0;
+}
+
+static void calc_num_proj_ref(AV1_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO *mi) {
+  AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const FeatureFlags *const features = &cm->features;
+
+  mi->num_proj_ref = 1;
+  WARP_SAMPLE_INFO *const warp_sample_info =
+      &x->warp_sample_info[mi->ref_frame[0]];
+  int *pts0 = warp_sample_info->pts;
+  int *pts_inref0 = warp_sample_info->pts_inref;
+  MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
+
+  if (features->switchable_motion_mode) {
+    // Determine which motion modes to search if more than SIMPLE_TRANSLATION
+    // is allowed.
+    last_motion_mode_allowed = motion_mode_allowed(
+        xd->global_motion, xd, mi, features->allow_warped_motion);
+  }
+
+  if (last_motion_mode_allowed == WARPED_CAUSAL) {
+    if (warp_sample_info->num < 0) {
+      warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
+    }
+    mi->num_proj_ref = warp_sample_info->num;
+  }
+}
+
+static void search_motion_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
+                               int mi_row, int mi_col, BLOCK_SIZE bsize,
+                               int *this_early_term, int use_model_yrd_large,
+                               int *rate_mv) {
+  AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const FeatureFlags *const features = &cm->features;
+  MB_MODE_INFO *const mi = xd->mi[0];
+  RD_STATS pf_rd_stats[MOTION_MODE_SEARCH_SIZE] = { 0 };
+  int best_skip = 0;
+  int best_early_term = 0;
+  int64_t best_cost = INT64_MAX;
+  int best_mode_index = -1;
+  const int interp_filter = features->interp_filter;
+
+  const MOTION_MODE motion_modes[MOTION_MODE_SEARCH_SIZE] = {
+    SIMPLE_TRANSLATION, WARPED_CAUSAL
+  };
+  int mode_search_size = is_warped_mode_allowed(cm, x, mi) ? 2 : 1;
+
+  WARP_SAMPLE_INFO *const warp_sample_info =
+      &x->warp_sample_info[mi->ref_frame[0]];
+  int *pts0 = warp_sample_info->pts;
+  int *pts_inref0 = warp_sample_info->pts_inref;
+
+  const int total_samples = mi->num_proj_ref;
+  if (total_samples == 0) {
+    // Do not search WARPED_CAUSAL if there are no samples to use to determine
+    // warped parameters.
+    mode_search_size = 1;
+  }
+
+  const MB_MODE_INFO base_mbmi = *mi;
+  MB_MODE_INFO best_mbmi;
+
+  for (int i = 0; i < mode_search_size; ++i) {
+    int64_t cost = INT64_MAX;
+    MOTION_MODE motion_mode = motion_modes[i];
+    *mi = base_mbmi;
+    mi->motion_mode = motion_mode;
+    if (motion_mode == SIMPLE_TRANSLATION) {
+      mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
+
+      av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
+      if (use_model_yrd_large)
+        model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
+                                  &pf_rd_stats[i], this_early_term, 1);
+      else
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], 1);
+      pf_rd_stats[i].rate +=
+          av1_get_switchable_rate(x, xd, cm->features.interp_filter,
+                                  cm->seq_params->enable_dual_filter);
+      cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
+    } else if (motion_mode == WARPED_CAUSAL) {
+      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+      const ModeCosts *mode_costs = &x->mode_costs;
+      mi->wm_params.wmtype = DEFAULT_WMTYPE;
+      mi->interp_filters =
+          av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
+
+      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
+      // Select the samples according to motion vector difference
+      if (mi->num_proj_ref > 1) {
+        mi->num_proj_ref = av1_selectSamples(&mi->mv[0].as_mv, pts, pts_inref,
+                                             mi->num_proj_ref, bsize);
+      }
+
+      // Compute the warped motion parameters with a least squares fit
+      //  using the collected samples
+      if (!av1_find_projection(mi->num_proj_ref, pts, pts_inref, bsize,
+                               mi->mv[0].as_mv.row, mi->mv[0].as_mv.col,
+                               &mi->wm_params, mi_row, mi_col)) {
+        if (mi->mode == NEWMV) {
+          const int_mv mv0 = mi->mv[0];
+          const WarpedMotionParams wm_params0 = mi->wm_params;
+          const int num_proj_ref0 = mi->num_proj_ref;
+
+          const int_mv ref_mv = av1_get_ref_mv(x, 0);
+          SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+          av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
+                                            &ref_mv.as_mv, NULL);
+
+          // Refine MV in a small range.
+          av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
+                               total_samples);
+          if (mi->mv[0].as_int == ref_mv.as_int) {
+            continue;
+          }
+
+          if (mv0.as_int != mi->mv[0].as_int) {
+            // Keep the refined MV and WM parameters.
+            int tmp_rate_mv = av1_mv_bit_cost(
+                &mi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
+                x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
+            *rate_mv = tmp_rate_mv;
+          } else {
+            // Restore the old MV and WM parameters.
+            mi->mv[0] = mv0;
+            mi->wm_params = wm_params0;
+            mi->num_proj_ref = num_proj_ref0;
+          }
+        }
+        // Build the warped predictor
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+                                      av1_num_planes(cm) - 1);
+        if (use_model_yrd_large)
+          model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
+                                    &pf_rd_stats[i], this_early_term, 1);
+        else
+          model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], 1);
+
+        pf_rd_stats[i].rate +=
+            mode_costs->motion_mode_cost[bsize][mi->motion_mode];
+        cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
+      } else {
+        cost = INT64_MAX;
+      }
+    }
+    if (cost < best_cost) {
+      best_mode_index = i;
+      best_cost = cost;
+      best_skip = pf_rd_stats[i].skip_txfm;
+      best_early_term = *this_early_term;
+      best_mbmi = *mi;
+    }
+  }
+  assert(best_mode_index >= 0 && best_mode_index < FILTER_SEARCH_SIZE);
+
+  *mi = best_mbmi;
+  this_rdc->rate = pf_rd_stats[best_mode_index].rate;
+  this_rdc->dist = pf_rd_stats[best_mode_index].dist;
+  this_rdc->sse = pf_rd_stats[best_mode_index].sse;
+  this_rdc->skip_txfm = (best_skip || best_early_term);
+  *this_early_term = best_early_term;
+  if (best_mode_index < FILTER_SEARCH_SIZE - 1) {
+    av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
+  }
+}
+#endif  // !CONFIG_REALTIME_ONLY
 
 #define COLLECT_PICK_MODE_STAT 0
 
@@ -2150,7 +2349,8 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mi = xd->mi[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
-
+  const InterpFilter filter_ref = cm->features.interp_filter;
+  const InterpFilter default_interp_filter = EIGHTTAP_REGULAR;
   BEST_PICKMODE best_pickmode;
 #if COLLECT_PICK_MODE_STAT
   static mode_search_stat ms_stat;
@@ -2163,7 +2363,6 @@
   const unsigned char segment_id = mi->segment_id;
   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
   const int *const rd_thresh_freq_fact = x->thresh_freq_fact[bsize];
-  const InterpFilter filter_ref = cm->features.interp_filter;
   int best_early_term = 0;
   unsigned int ref_costs_single[REF_FRAMES],
       ref_costs_comp[REF_FRAMES][REF_FRAMES];
@@ -2195,7 +2394,6 @@
 #if COLLECT_PICK_MODE_STAT
   aom_usec_timer_start(&ms_stat.timer2);
 #endif
-  const InterpFilter default_interp_filter = EIGHTTAP_REGULAR;
   int64_t thresh_sad_pred = INT64_MAX;
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
@@ -2247,6 +2445,10 @@
   av1_invalid_rd_stats(&best_rdc);
   av1_invalid_rd_stats(&this_rdc);
   av1_invalid_rd_stats(rd_cost);
+  for (int i = 0; i < REF_FRAMES; ++i) {
+    x->warp_sample_info[i].num = -1;
+  }
+
   mi->bsize = bsize;
   mi->ref_frame[0] = NONE_FRAME;
   mi->ref_frame[1] = NONE_FRAME;
@@ -2517,6 +2719,12 @@
         y_sad = x->pred_mv1_sad[LAST_FRAME];
       set_color_sensitivity(cpi, x, xd, bsize, y_sad, x->source_variance);
     }
+    mi->motion_mode = SIMPLE_TRANSLATION;
+#if !CONFIG_REALTIME_ONLY
+    if (cpi->oxcf.motion_mode_cfg.allow_warped_motion) {
+      calc_num_proj_ref(cpi, x, mi);
+    }
+#endif
 
     if (enable_filter_search && !force_mv_inter_layer && !comp_pred &&
         ((mi->mv[0].as_mv.row & 0x07) || (mi->mv[0].as_mv.col & 0x07)) &&
@@ -2524,6 +2732,15 @@
       search_filter_ref(cpi, x, &this_rdc, mi_row, mi_col, tmp, bsize,
                         reuse_inter_pred, &this_mode_pred, &this_early_term,
                         use_model_yrd_large);
+#if !CONFIG_REALTIME_ONLY
+    } else if (cpi->oxcf.motion_mode_cfg.allow_warped_motion &&
+               this_mode == NEWMV) {
+      search_motion_mode(cpi, x, &this_rdc, mi_row, mi_col, bsize,
+                         &this_early_term, use_model_yrd_large, &rate_mv);
+      if (this_mode == NEWMV) {
+        frame_mv[this_mode][ref_frame] = mi->mv[0];
+      }
+#endif
     } else {
       mi->interp_filters =
           (filter_ref == SWITCHABLE)
@@ -2643,6 +2860,9 @@
       best_rdc = this_rdc;
       best_early_term = this_early_term;
       best_pickmode.best_mode = this_mode;
+      best_pickmode.best_motion_mode = mi->motion_mode;
+      best_pickmode.wm_params = mi->wm_params;
+      best_pickmode.num_proj_ref = mi->num_proj_ref;
       best_pickmode.best_pred_filter = mi->interp_filters;
       best_pickmode.best_tx_size = mi->tx_size;
       best_pickmode.best_ref_frame = ref_frame;
@@ -2665,6 +2885,9 @@
   }
 
   mi->mode = best_pickmode.best_mode;
+  mi->motion_mode = best_pickmode.best_motion_mode;
+  mi->wm_params = best_pickmode.wm_params;
+  mi->num_proj_ref = best_pickmode.num_proj_ref;
   mi->interp_filters = best_pickmode.best_pred_filter;
   mi->tx_size = best_pickmode.best_tx_size;
   memset(mi->inter_tx_size, mi->tx_size, sizeof(mi->inter_tx_size));
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 0deefd6..23c8396 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -1461,8 +1461,13 @@
     sf->rt_sf.estimate_motion_for_var_based_partition = 1;
     sf->rt_sf.short_circuit_low_temp_var = 1;
 #if !CONFIG_AV1_TEMPORAL_DENOISING
+#if !CONFIG_REALTIME_ONLY
+    sf->rt_sf.reuse_inter_pred_nonrd =
+        (cpi->oxcf.motion_mode_cfg.enable_warped_motion == 0);
+#else
     sf->rt_sf.reuse_inter_pred_nonrd = 1;
 #endif
+#endif
     sf->rt_sf.use_nonrd_altref_frame = 0;
     sf->rt_sf.nonrd_prune_ref_frame_search = 2;
     sf->rt_sf.nonrd_check_partition_merge_mode = 0;