Sample selection in warped motion

Added a sample selection process in warped motion.
1. Gather more samples including multiple rows on the top, multiple
columns on the left, and the upper-right block.
2. Sort samples by the MV difference between the neighbour's MV and
the current block's MV. Trim the samples with considerably large MV
difference.

Borg test result:
             avg_psnr ovr_psnr ssim
cam_lowres:  -0.241   -0.243  -0.376
lowres:      -0.104   -0.110  -0.179

The changes are wrapped in WARPED_MOTION_SORT_SAMPLES macro.

Change-Id: I2730bb31a0a3ad28215ccd16fd6da0ea8b2ed404
diff --git a/av1/common/mv.h b/av1/common/mv.h
index a6bb6b8..dabfc0e 100644
--- a/av1/common/mv.h
+++ b/av1/common/mv.h
@@ -35,6 +35,10 @@
   int32_t col;
 } MV32;
 
+#if CONFIG_WARPED_MOTION
+#define WARPED_MOTION_SORT_SAMPLES 1
+#endif  // CONFIG_WARPED_MOTION
+
 #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
 // Bits of precision used for the model
 #define WARPEDMODEL_PREC_BITS 16
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index afc78c0..fdbcebc 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -995,6 +995,262 @@
 }
 
 #if CONFIG_WARPED_MOTION
+#if WARPED_MOTION_SORT_SAMPLES
+static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
+                                  int *pts_mv, int global_offset_r,
+                                  int global_offset_c, int row_offset,
+                                  int sign_r, int col_offset, int sign_c) {
+  int bw = block_size_wide[mbmi->sb_type];
+  int bh = block_size_high[mbmi->sb_type];
+  int cr_offset = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
+  int cc_offset = col_offset * MI_SIZE + sign_c * AOMMAX(bw, MI_SIZE) / 2 - 1;
+  int x = cc_offset + global_offset_c;
+  int y = cr_offset + global_offset_r;
+
+  pts[0] = (x * 8);
+  pts[1] = (y * 8);
+  pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
+  pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
+  pts_mv[0] = mbmi->mv[0].as_mv.col;
+  pts_mv[1] = mbmi->mv[0].as_mv.row;
+}
+
+// Only sort pts and pts_inref, and pts_mv is not sorted.
+#define TRIM_THR 16
+int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len) {
+  int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 };
+  int i, j, k;
+  int ret = len;
+
+  for (i = 0; i < len; ++i)
+    pts_mvd[i] =
+        abs(pts_mv[2 * i] - mv->col) + abs(pts_mv[2 * i + 1] - mv->row);
+
+  for (i = 1; i <= len - 1; ++i) {
+    for (j = 0; j < i; ++j) {
+      if (pts_mvd[j] > pts_mvd[i]) {
+        int temp, tempi, tempj, ptempi, ptempj;
+
+        temp = pts_mvd[i];
+        tempi = pts[2 * i];
+        tempj = pts[2 * i + 1];
+        ptempi = pts_inref[2 * i];
+        ptempj = pts_inref[2 * i + 1];
+
+        for (k = i; k > j; k--) {
+          pts_mvd[k] = pts_mvd[k - 1];
+          pts[2 * k] = pts[2 * (k - 1)];
+          pts[2 * k + 1] = pts[2 * (k - 1) + 1];
+          pts_inref[2 * k] = pts_inref[2 * (k - 1)];
+          pts_inref[2 * k + 1] = pts_inref[2 * (k - 1) + 1];
+        }
+
+        pts_mvd[j] = temp;
+        pts[2 * j] = tempi;
+        pts[2 * j + 1] = tempj;
+        pts_inref[2 * j] = ptempi;
+        pts_inref[2 * j + 1] = ptempj;
+        break;
+      }
+    }
+  }
+
+  for (i = len - 1; i >= 1; i--) {
+    int low = (i == 1) ? 1 : AOMMAX((pts_mvd[i - 1] - pts_mvd[0]) / (i - 1), 1);
+
+    if ((pts_mvd[i] - pts_mvd[i - 1]) >= TRIM_THR * low) ret = i;
+  }
+
+  if (ret > LEAST_SQUARES_SAMPLES_MAX) ret = LEAST_SQUARES_SAMPLES_MAX;
+  return ret;
+}
+
+// Note: Samples returned are at 1/8-pel precision
+int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
+                int *pts, int *pts_inref, int *pts_mv) {
+  MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
+  int ref_frame = mbmi0->ref_frame[0];
+  int up_available = xd->up_available;
+  int left_available = xd->left_available;
+  int i, mi_step = 1, np = 0, n, j, k;
+  int global_offset_c = mi_col * MI_SIZE;
+  int global_offset_r = mi_row * MI_SIZE;
+
+  const TileInfo *const tile = &xd->tile;
+  // Search nb range in the unit of mi
+  int bs =
+      (AOMMAX(xd->n8_w, xd->n8_h) > 1) ? (AOMMAX(xd->n8_w, xd->n8_h) >> 1) : 1;
+  int marked[16 * 32];  // max array size for 128x128
+  int do_tl = 1;
+  int do_tr = 1;
+
+  // scan the above rows
+  if (up_available) {
+    for (n = 0; n < bs; n++) {
+      int mi_row_offset = -1 * (n + 1);
+
+      if (!n) {
+        MODE_INFO *mi = xd->mi[mi_row_offset * xd->mi_stride];
+        MB_MODE_INFO *mbmi = &mi->mbmi;
+        uint8_t n8_w = mi_size_wide[mbmi->sb_type];
+
+        // Handle "current block width <= above block width" case.
+        if (xd->n8_w <= n8_w) {
+          int col_offset = -mi_col % n8_w;
+
+          if (col_offset < 0) do_tl = 0;
+          if (col_offset + n8_w > xd->n8_w) do_tr = 0;
+
+          if (mbmi->ref_frame[0] == ref_frame &&
+              mbmi->ref_frame[1] == NONE_FRAME) {
+            record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                           global_offset_c, 0, -1, col_offset, 1);
+            pts += 2;
+            pts_inref += 2;
+            pts_mv += 2;
+            np++;
+          }
+          break;
+        }
+      }
+
+      // Handle "current block width > above block width" case.
+      if (!n) memset(marked, 0, bs * xd->n8_w * sizeof(*marked));
+
+      for (i = 0; i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+        int mi_col_offset = i;
+        MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+        MB_MODE_INFO *mbmi = &mi->mbmi;
+        uint8_t n8_w = mi_size_wide[mbmi->sb_type];
+        uint8_t n8_h = mi_size_high[mbmi->sb_type];
+
+        mi_step = AOMMIN(xd->n8_w, n8_w);
+
+        // Processed already
+        if (marked[n * xd->n8_w + i]) continue;
+
+        for (j = 0; j < AOMMIN(bs, n8_h); j++)
+          for (k = 0; k < AOMMIN(xd->n8_w, n8_w); k++)
+            marked[(n + j) * xd->n8_w + i + k] = 1;
+
+        if (mbmi->ref_frame[0] == ref_frame &&
+            mbmi->ref_frame[1] == NONE_FRAME) {
+          record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                         global_offset_c, -n, -1, i, 1);
+          pts += 2;
+          pts_inref += 2;
+          pts_mv += 2;
+          np++;
+        }
+      }
+    }
+  }
+  assert(2 * np <= SAMPLES_ARRAY_SIZE);
+
+  // scan the left columns
+  if (left_available) {
+    for (n = 0; n < bs; n++) {
+      int mi_col_offset = -1 * (n + 1);
+
+      if (!n) {
+        MODE_INFO *mi = xd->mi[mi_col_offset];
+        MB_MODE_INFO *mbmi = &mi->mbmi;
+        uint8_t n8_h = mi_size_high[mbmi->sb_type];
+
+        // Handle "current block height <= above block height" case.
+        if (xd->n8_h <= n8_h) {
+          int row_offset = -mi_row % n8_h;
+
+          if (row_offset < 0) do_tl = 0;
+
+          if (mbmi->ref_frame[0] == ref_frame &&
+              mbmi->ref_frame[1] == NONE_FRAME) {
+            record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                           global_offset_c, row_offset, 1, 0, -1);
+            pts += 2;
+            pts_inref += 2;
+            pts_mv += 2;
+            np++;
+          }
+          break;
+        }
+      }
+
+      // Handle "current block height > above block height" case.
+      if (!n) memset(marked, 0, bs * xd->n8_h * sizeof(*marked));
+
+      for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+        int mi_row_offset = i;
+        MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+        MB_MODE_INFO *mbmi = &mi->mbmi;
+        uint8_t n8_w = mi_size_wide[mbmi->sb_type];
+        uint8_t n8_h = mi_size_high[mbmi->sb_type];
+
+        mi_step = AOMMIN(xd->n8_h, n8_h);
+
+        // Processed already
+        if (marked[n * xd->n8_h + i]) continue;
+
+        for (j = 0; j < AOMMIN(bs, n8_w); j++)
+          for (k = 0; k < AOMMIN(xd->n8_h, n8_h); k++)
+            marked[(n + j) * xd->n8_h + i + k] = 1;
+
+        if (mbmi->ref_frame[0] == ref_frame &&
+            mbmi->ref_frame[1] == NONE_FRAME) {
+          record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                         global_offset_c, i, 1, -n, -1);
+          pts += 2;
+          pts_inref += 2;
+          pts_mv += 2;
+          np++;
+        }
+      }
+    }
+  }
+  assert(2 * np <= SAMPLES_ARRAY_SIZE);
+
+  // Top-left block
+  if (do_tl && left_available && up_available) {
+    int mi_row_offset = -1;
+    int mi_col_offset = -1;
+
+    MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+    MB_MODE_INFO *mbmi = &mi->mbmi;
+
+    if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+      record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                     global_offset_c, 0, -1, 0, -1);
+      pts += 2;
+      pts_inref += 2;
+      pts_mv += 2;
+      np++;
+    }
+  }
+  assert(2 * np <= SAMPLES_ARRAY_SIZE);
+
+  // Top-right block
+  if (do_tr && has_top_right(xd, mi_row, mi_col, AOMMAX(xd->n8_w, xd->n8_h))) {
+    POSITION trb_pos = { -1, xd->n8_w };
+
+    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, cm, &trb_pos)) {
+      int mi_row_offset = -1;
+      int mi_col_offset = xd->n8_w;
+
+      MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+      MB_MODE_INFO *mbmi = &mi->mbmi;
+
+      if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+        record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
+                       global_offset_c, 0, -1, xd->n8_w, 1);
+        np++;
+      }
+    }
+  }
+  assert(2 * np <= SAMPLES_ARRAY_SIZE);
+
+  return np;
+}
+#else
 void calc_projection_samples(MB_MODE_INFO *const mbmi, int x, int y,
                              int *pts_inref) {
   pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
@@ -1099,4 +1355,5 @@
 
   return np;
 }
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #endif  // CONFIG_WARPED_MOTION
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index 63318f4..31e6252 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h
@@ -389,8 +389,14 @@
 #endif  // CONFIG_EXT_INTER
 
 #if CONFIG_WARPED_MOTION
+#if WARPED_MOTION_SORT_SAMPLES
+int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len);
+int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
+                int *pts, int *pts_inref, int *pts_mv);
+#else
 int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
                 int *pts, int *pts_inref);
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #endif  // CONFIG_WARPED_MOTION
 
 #if CONFIG_INTRABC
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index 760f1e3..845eb9a 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -28,7 +28,15 @@
 #if CONFIG_WARPED_MOTION
 #define LEAST_SQUARES_SAMPLES_MAX_BITS 3
 #define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
+
+#if WARPED_MOTION_SORT_SAMPLES
+// #define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
+// Search half bsize on the top and half bsize on the left, 1 upper-left block,
+// 1 upper-right block.
+#define SAMPLES_ARRAY_SIZE ((MAX_MIB_SIZE * MAX_MIB_SIZE + 2) * 2)
+#else
 #define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
+#endif  // WARPED_MOTION_SORT_SAMPLES
 
 #define DEFAULT_WMTYPE AFFINE
 #endif  // CONFIG_WARPED_MOTION
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index cb81494..b28f2d9 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -2172,6 +2172,9 @@
   int16_t mode_ctx = 0;
 #if CONFIG_WARPED_MOTION
   int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#if WARPED_MOTION_SORT_SAMPLES
+  int pts_mv[SAMPLES_ARRAY_SIZE];
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #endif  // CONFIG_WARPED_MOTION
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
@@ -2636,7 +2639,12 @@
   mbmi->motion_mode = SIMPLE_TRANSLATION;
 #if CONFIG_WARPED_MOTION
   if (mbmi->sb_type >= BLOCK_8X8 && !has_second_ref(mbmi))
+#if WARPED_MOTION_SORT_SAMPLES
+    mbmi->num_proj_ref[0] =
+        findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
+#else
     mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #endif  // CONFIG_WARPED_MOTION
 #if CONFIG_MOTION_VAR
   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
@@ -2660,6 +2668,13 @@
 #if CONFIG_WARPED_MOTION
     if (mbmi->motion_mode == WARPED_CAUSAL) {
       mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
+
+#if WARPED_MOTION_SORT_SAMPLES
+      if (mbmi->num_proj_ref[0] > 1)
+        mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
+                                            pts_inref, mbmi->num_proj_ref[0]);
+#endif  // WARPED_MOTION_SORT_SAMPLES
+
       if (find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
                           mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
                           &mbmi->wm_params[0], mi_row, mi_col)) {
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index b49e080..269920a 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -987,9 +987,16 @@
 }
 
 // Refine MV in a small range
+#if WARPED_MOTION_SORT_SAMPLES
+unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
+                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                  int *pts0, int *pts_inref0, int *pts_mv0,
+                                  int total_samples) {
+#else
 unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
                                   int *pts, int *pts_inref) {
+#endif  // WARPED_MOTION_SORT_SAMPLES
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MODE_INFO *mi = xd->mi[0];
@@ -1002,6 +1009,9 @@
   int16_t *tr = &mbmi->mv[0].as_mv.row;
   int16_t *tc = &mbmi->mv[0].as_mv.col;
   WarpedMotionParams best_wm_params = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+  int best_num_proj_ref = mbmi->num_proj_ref[0];
+#endif  // WARPED_MOTION_SORT_SAMPLES
   unsigned int bestmse;
   int minc, maxc, minr, maxr;
   const int start = cm->allow_high_precision_mv ? 0 : 4;
@@ -1028,6 +1038,16 @@
 
       if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
         MV this_mv = { *tr, *tc };
+#if WARPED_MOTION_SORT_SAMPLES
+        int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+
+        memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+        memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
+        if (total_samples > 1)
+          mbmi->num_proj_ref[0] =
+              sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples);
+#endif  // WARPED_MOTION_SORT_SAMPLES
+
         if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
                              *tc, &mbmi->wm_params[0], mi_row, mi_col)) {
           thismse =
@@ -1036,6 +1056,9 @@
           if (thismse < bestmse) {
             best_idx = idx;
             best_wm_params = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+            best_num_proj_ref = mbmi->num_proj_ref[0];
+#endif  // WARPED_MOTION_SORT_SAMPLES
             bestmse = thismse;
           }
         }
@@ -1053,7 +1076,9 @@
   *tr = br;
   *tc = bc;
   mbmi->wm_params[0] = best_wm_params;
-
+#if WARPED_MOTION_SORT_SAMPLES
+  mbmi->num_proj_ref[0] = best_num_proj_ref;
+#endif  // WARPED_MOTION_SORT_SAMPLES
   return bestmse;
 }
 #endif  // CONFIG_WARPED_MOTION
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index e013500..733e415 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -156,10 +156,18 @@
 unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi,
                                      MACROBLOCK *const x, BLOCK_SIZE bsize,
                                      int mi_row, int mi_col, const MV *this_mv);
+#if WARPED_MOTION_SORT_SAMPLES
+unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
+                                  MACROBLOCK *const x, BLOCK_SIZE bsize,
+                                  int mi_row, int mi_col, int *pts0,
+                                  int *pts_inref0, int *pts_mv0,
+                                  int total_samples);
+#else
 unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
                                   MACROBLOCK *const x, BLOCK_SIZE bsize,
                                   int mi_row, int mi_col, int *pts,
                                   int *pts_inref);
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #endif  // CONFIG_WARPED_MOTION
 
 #endif  // AV1_ENCODER_MCOMP_H_
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 8cb1b97..71f8b51 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -8010,7 +8010,13 @@
 #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
 
 #if CONFIG_WARPED_MOTION
+#if WARPED_MOTION_SORT_SAMPLES
+  int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
+  int pts_mv0[SAMPLES_ARRAY_SIZE];
+  int total_samples;
+#else
   int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #endif  // CONFIG_WARPED_MOTION
 
 #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
@@ -8020,7 +8026,13 @@
   if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
 #if CONFIG_WARPED_MOTION
   aom_clear_system_state();
+#if WARPED_MOTION_SORT_SAMPLES
+  mbmi->num_proj_ref[0] =
+      findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0);
+  total_samples = mbmi->num_proj_ref[0];
+#else
   mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #if CONFIG_EXT_INTER
   best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
 #endif  // CONFIG_EXT_INTER
@@ -8116,6 +8128,9 @@
 
 #if CONFIG_WARPED_MOTION
     if (mbmi->motion_mode == WARPED_CAUSAL) {
+#if WARPED_MOTION_SORT_SAMPLES
+      int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#endif  // WARPED_MOTION_SORT_SAMPLES
 #if CONFIG_EXT_INTER
       *mbmi = *best_bmc_mbmi;
       mbmi->motion_mode = WARPED_CAUSAL;
@@ -8131,6 +8146,19 @@
                                                             : cm->interp_filter;
 #endif  // CONFIG_DUAL_FILTER
 
+#if WARPED_MOTION_SORT_SAMPLES
+      memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+      memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
+      // Rank the samples by motion vector difference
+      if (mbmi->num_proj_ref[0] > 1) {
+        mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
+                                            pts_inref, mbmi->num_proj_ref[0]);
+#if CONFIG_EXT_INTER
+        best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
+#endif  // CONFIG_EXT_INTER
+      }
+#endif  // WARPED_MOTION_SORT_SAMPLES
+
       if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
                            mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
                            &mbmi->wm_params[0], mi_row, mi_col)) {
@@ -8139,9 +8167,16 @@
           int tmp_rate_mv = 0;
           const int_mv mv0 = mbmi->mv[0];
           WarpedMotionParams wm_params0 = mbmi->wm_params[0];
+#if WARPED_MOTION_SORT_SAMPLES
+          int num_proj_ref0 = mbmi->num_proj_ref[0];
 
           // Refine MV in a small range.
+          av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
+                               pts_mv0, total_samples);
+#else
+          // Refine MV in a small range.
           av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
+#endif  // WARPED_MOTION_SORT_SAMPLES
 
           // Keep the refined MV and WM parameters.
           if (mv0.as_int != mbmi->mv[0].as_int) {
@@ -8162,6 +8197,9 @@
               tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
             }
 #if CONFIG_EXT_INTER
+#if WARPED_MOTION_SORT_SAMPLES
+            best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
+#endif  // WARPED_MOTION_SORT_SAMPLES
             tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
 #else
             tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
@@ -8176,6 +8214,9 @@
             // Restore the old MV and WM parameters.
             mbmi->mv[0] = mv0;
             mbmi->wm_params[0] = wm_params0;
+#if WARPED_MOTION_SORT_SAMPLES
+            mbmi->num_proj_ref[0] = num_proj_ref0;
+#endif  // WARPED_MOTION_SORT_SAMPLES
           }
         }
 
@@ -11735,7 +11776,17 @@
 #if CONFIG_WARPED_MOTION
   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+#if WARPED_MOTION_SORT_SAMPLES
+    int pts_mv[SAMPLES_ARRAY_SIZE];
+    mbmi->num_proj_ref[0] =
+        findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
+    // Rank the samples by motion vector difference
+    if (mbmi->num_proj_ref[0] > 1)
+      mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
+                                          pts_inref, mbmi->num_proj_ref[0]);
+#else
     mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#endif  // WARPED_MOTION_SORT_SAMPLES
   }
 #endif