Remove sample sorting in warped motion sample selection

The original sample selection process involves finding best 8 sorted
samples according to motion vector difference(MVD) between neighbor
block and current block, and then trimming samples. To reduce the
complexity, use the current block width/height as the MVD threshold,
and trim the samples without sorting.

This gives slightly less gain than the original method.
AWCY result:
         PSNR   PSNR HVS   SSIM
Average  -0.07   -0.13     -0.12
Borg test result:
             avg_psnr ovr_psnr ssim
cam_lowres:  -0.112   -0.112  -0.180
lowres:      -0.068   -0.073  -0.125

Change-Id: Ic2f79a170441d5bcb04ea87dddf490ef7fbba8bc
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index 953e14b..0ca9b1f 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -1601,9 +1601,9 @@
 
 #if CONFIG_EXT_WARPED_MOTION
 static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
-                                  int *pts_mv, int global_offset_r,
-                                  int global_offset_c, int row_offset,
-                                  int sign_r, int col_offset, int sign_c) {
+                                  int global_offset_r, int global_offset_c,
+                                  int row_offset, int sign_r, int col_offset,
+                                  int sign_c) {
   int bw = block_size_wide[mbmi->sb_type];
   int bh = block_size_high[mbmi->sb_type];
   int cr_offset = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
@@ -1615,56 +1615,48 @@
   pts[1] = (y * 8);
   pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
   pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
-  pts_mv[0] = mbmi->mv[0].as_mv.col;
-  pts_mv[1] = mbmi->mv[0].as_mv.row;
 }
 
-// Only sort pts and pts_inref, and pts_mv is not sorted.
-#define TRIM_THR 16
-int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len) {
+// Select samples according to the motion vector difference.
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize) {
+  const int bw = block_size_wide[bsize];
+  const int bh = block_size_high[bsize];
+  const int thresh = clamp(AOMMAX(bw, bh), 16, 112);
   int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 };
-  int i, j, k;
-  int ret = len;
-  assert(len <= SAMPLES_MAX);
+  int i, j, k, l = len;
+  int ret = 0;
+  assert(len <= LEAST_SQUARES_SAMPLES_MAX);
 
-  for (i = 0; i < len; ++i)
-    pts_mvd[i] =
-        abs(pts_mv[2 * i] - mv->col) + abs(pts_mv[2 * i + 1] - mv->row);
+  // Obtain the motion vector difference.
+  for (i = 0; i < len; ++i) {
+    pts_mvd[i] = abs(pts_inref[2 * i] - pts[2 * i] - mv->col) +
+                 abs(pts_inref[2 * i + 1] - pts[2 * i + 1] - mv->row);
 
-  // TODO(yunqingwang): len is capped at 8 after this. Thus, modify this to find
-  // the best 8 samples instead of sorting all samples.
-  for (i = 1; i <= len - 1; ++i) {
-    for (j = 0; j < i; ++j) {
-      if (pts_mvd[j] > pts_mvd[i]) {
-        int temp, tempi, tempj, ptempi, ptempj;
-
-        temp = pts_mvd[i];
-        tempi = pts[2 * i];
-        tempj = pts[2 * i + 1];
-        ptempi = pts_inref[2 * i];
-        ptempj = pts_inref[2 * i + 1];
-
-        for (k = i; k > j; k--) {
-          pts_mvd[k] = pts_mvd[k - 1];
-          pts[2 * k] = pts[2 * (k - 1)];
-          pts[2 * k + 1] = pts[2 * (k - 1) + 1];
-          pts_inref[2 * k] = pts_inref[2 * (k - 1)];
-          pts_inref[2 * k + 1] = pts_inref[2 * (k - 1) + 1];
-        }
-
-        pts_mvd[j] = temp;
-        pts[2 * j] = tempi;
-        pts[2 * j + 1] = tempj;
-        pts_inref[2 * j] = ptempi;
-        pts_inref[2 * j + 1] = ptempj;
-        break;
-      }
-    }
+    if (pts_mvd[i] > thresh)
+      pts_mvd[i] = -1;
+    else
+      ret++;
   }
-  len = AOMMIN(len, LEAST_SQUARES_SAMPLES_MAX);
 
-  for (i = len - 1; i >= 1; i--) {
-    if ((pts_mvd[i] - pts_mvd[i - 1]) >= TRIM_THR) ret = i;
+  // Keep at least 1 sample.
+  if (!ret) return 1;
+
+  i = 0;
+  j = l - 1;
+  for (k = 0; k < l - ret; k++) {
+    while (pts_mvd[i] != -1) i++;
+    while (pts_mvd[j] == -1) j--;
+    assert(i != j);
+    if (i > j) break;
+
+    // Replace the discarded samples;
+    pts_mvd[i] = pts_mvd[j];
+    pts[2 * i] = pts[2 * j];
+    pts[2 * i + 1] = pts[2 * j + 1];
+    pts_inref[2 * i] = pts_inref[2 * j];
+    pts_inref[2 * i + 1] = pts_inref[2 * j + 1];
+    i++;
+    j--;
   }
 
   return ret;
@@ -1672,7 +1664,7 @@
 
 // Note: Samples returned are at 1/8-pel precision
 int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
-                int *pts, int *pts_inref, int *pts_mv) {
+                int *pts, int *pts_inref) {
   MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
   int ref_frame = mbmi0->ref_frame[0];
   int up_available = xd->up_available;
@@ -1700,13 +1692,12 @@
       if (col_offset + n8_w > xd->n8_w) do_tr = 0;
 
       if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
-        record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                       global_offset_c, 0, -1, col_offset, 1);
+        record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
+                       0, -1, col_offset, 1);
         pts += 2;
         pts_inref += 2;
-        pts_mv += 2;
         np++;
-        if (np >= SAMPLES_MAX) return SAMPLES_MAX;
+        if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
       }
     } else {
       // Handle "current block width > above block width" case.
@@ -1719,18 +1710,17 @@
 
         if (mbmi->ref_frame[0] == ref_frame &&
             mbmi->ref_frame[1] == NONE_FRAME) {
-          record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                         global_offset_c, 0, -1, i, 1);
+          record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
+                         0, -1, i, 1);
           pts += 2;
           pts_inref += 2;
-          pts_mv += 2;
           np++;
-          if (np >= SAMPLES_MAX) return SAMPLES_MAX;
+          if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
         }
       }
     }
   }
-  assert(np <= SAMPLES_MAX);
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
 
   // scan the nearest left columns
   if (left_available) {
@@ -1747,13 +1737,12 @@
       if (row_offset < 0) do_tl = 0;
 
       if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
-        record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                       global_offset_c, row_offset, 1, 0, -1);
+        record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
+                       row_offset, 1, 0, -1);
         pts += 2;
         pts_inref += 2;
-        pts_mv += 2;
         np++;
-        if (np >= SAMPLES_MAX) return SAMPLES_MAX;
+        if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
       }
     } else {
       // Handle "current block height > above block height" case.
@@ -1766,18 +1755,17 @@
 
         if (mbmi->ref_frame[0] == ref_frame &&
             mbmi->ref_frame[1] == NONE_FRAME) {
-          record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                         global_offset_c, i, 1, 0, -1);
+          record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
+                         i, 1, 0, -1);
           pts += 2;
           pts_inref += 2;
-          pts_mv += 2;
           np++;
-          if (np >= SAMPLES_MAX) return SAMPLES_MAX;
+          if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
         }
       }
     }
   }
-  assert(np <= SAMPLES_MAX);
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
 
   // Top-left block
   if (do_tl && left_available && up_available) {
@@ -1788,16 +1776,15 @@
     MB_MODE_INFO *mbmi = &mi->mbmi;
 
     if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
-      record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                     global_offset_c, 0, -1, 0, -1);
+      record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c, 0,
+                     -1, 0, -1);
       pts += 2;
       pts_inref += 2;
-      pts_mv += 2;
       np++;
-      if (np >= SAMPLES_MAX) return SAMPLES_MAX;
+      if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
     }
   }
-  assert(np <= SAMPLES_MAX);
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
 
   // Top-right block
   if (do_tr &&
@@ -1812,14 +1799,14 @@
       MB_MODE_INFO *mbmi = &mi->mbmi;
 
       if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
-        record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
-                       global_offset_c, 0, -1, xd->n8_w, 1);
+        record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
+                       0, -1, xd->n8_w, 1);
         np++;
-        if (np >= SAMPLES_MAX) return SAMPLES_MAX;
+        if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
       }
     }
   }
-  assert(np <= SAMPLES_MAX);
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
 
   return np;
 }
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index b020885..764117f 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h
@@ -480,13 +480,10 @@
                            int16_t *mode_context);
 
 #if CONFIG_EXT_WARPED_MOTION
-int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len);
-int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
-                int *pts, int *pts_inref, int *pts_mv);
-#else
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize);
+#endif  // CONFIG_EXT_WARPED_MOTION
 int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
                 int *pts, int *pts_inref);
-#endif  // CONFIG_EXT_WARPED_MOTION
 
 #if CONFIG_INTRABC
 #define INTRABC_DELAY_PIXELS 256  //  Delay of 256 pixels
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index bbd1bcf..b797294 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -27,19 +27,8 @@
 #define MAX_PARAMDIM 9
 #define LEAST_SQUARES_SAMPLES_MAX_BITS 3
 #define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
-
-#define WARPED_MOTION_DEBUG 0
-
-#if CONFIG_EXT_WARPED_MOTION
-// Search 1 row on the top and 1 column on the left, 1 upper-left block,
-// 1 upper-right block. In worst case, the samples are (MAX_MIB_SIZE * 2 + 2).
-// Here force number of samples within SAMPLES_MAX.
-#define SAMPLES_MAX (LEAST_SQUARES_SAMPLES_MAX * 2)
-#define SAMPLES_ARRAY_SIZE (SAMPLES_MAX * 2)
-#else
 #define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
-#endif  // CONFIG_EXT_WARPED_MOTION
-
+#define WARPED_MOTION_DEBUG 0
 #define DEFAULT_WMTYPE AFFINE
 
 extern const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 758b3f8..74e2c86 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -1814,9 +1814,6 @@
   int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
   int16_t compound_inter_mode_ctx[MODE_CTX_REF_FRAMES];
   int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
-#if CONFIG_EXT_WARPED_MOTION
-  int pts_mv[SAMPLES_ARRAY_SIZE];
-#endif  // CONFIG_EXT_WARPED_MOTION
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
   assert(NELEMENTS(mode_2_counter) == MB_MODE_COUNT);
@@ -2111,12 +2108,7 @@
       !mbmi->skip_mode &&
 #endif  // CONFIG_EXT_SKIP
       !has_second_ref(mbmi))
-#if CONFIG_EXT_WARPED_MOTION
-    mbmi->num_proj_ref[0] =
-        findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
-#else
     mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
-#endif  // CONFIG_EXT_WARPED_MOTION
   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
 
   if (mbmi->ref_frame[1] != INTRA_FRAME)
@@ -2223,8 +2215,8 @@
 
 #if CONFIG_EXT_WARPED_MOTION
     if (mbmi->num_proj_ref[0] > 1)
-      mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
-                                          pts_inref, mbmi->num_proj_ref[0]);
+      mbmi->num_proj_ref[0] = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
+                                            mbmi->num_proj_ref[0], bsize);
 #endif  // CONFIG_EXT_WARPED_MOTION
 
     if (find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 126d004..69a0d21 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -944,7 +944,7 @@
 #if CONFIG_EXT_WARPED_MOTION
 unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
                                   BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                  int *pts0, int *pts_inref0, int *pts_mv0,
+                                  int *pts0, int *pts_inref0,
                                   int total_samples) {
 #else
 unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
@@ -999,7 +999,7 @@
         memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
         if (total_samples > 1)
           mbmi->num_proj_ref[0] =
-              sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples);
+              selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
 #endif  // CONFIG_EXT_WARPED_MOTION
 
         if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 87760c9..3313bf0 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -158,8 +158,7 @@
 unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
                                   MACROBLOCK *const x, BLOCK_SIZE bsize,
                                   int mi_row, int mi_col, int *pts0,
-                                  int *pts_inref0, int *pts_mv0,
-                                  int total_samples);
+                                  int *pts_inref0, int total_samples);
 #else
 unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
                                   MACROBLOCK *const x, BLOCK_SIZE bsize,
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index cfd3a89..b8f6c36 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -7361,7 +7361,6 @@
 
 #if CONFIG_EXT_WARPED_MOTION
   int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
-  int pts_mv0[SAMPLES_ARRAY_SIZE];
   int total_samples;
 #else
   int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
@@ -7372,8 +7371,7 @@
   if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
   aom_clear_system_state();
 #if CONFIG_EXT_WARPED_MOTION
-  mbmi->num_proj_ref[0] =
-      findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0);
+  mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0);
   total_samples = mbmi->num_proj_ref[0];
 #else
   mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
@@ -7435,10 +7433,10 @@
 #if CONFIG_EXT_WARPED_MOTION
       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
-      // Rank the samples by motion vector difference
+      // Select the samples according to motion vector difference
       if (mbmi->num_proj_ref[0] > 1) {
-        mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
-                                            pts_inref, mbmi->num_proj_ref[0]);
+        mbmi->num_proj_ref[0] = selectSamples(
+            &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref[0], bsize);
         best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
       }
 #endif  // CONFIG_EXT_WARPED_MOTION
@@ -7456,7 +7454,7 @@
 
           // Refine MV in a small range.
           av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
-                               pts_mv0, total_samples);
+                               total_samples);
 #else
           // Refine MV in a small range.
           av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
@@ -10611,16 +10609,12 @@
   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
-#if CONFIG_EXT_WARPED_MOTION
-    int pts_mv[SAMPLES_ARRAY_SIZE];
-    mbmi->num_proj_ref[0] =
-        findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
-    // Rank the samples by motion vector difference
-    if (mbmi->num_proj_ref[0] > 1)
-      mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
-                                          pts_inref, mbmi->num_proj_ref[0]);
-#else
     mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
+#if CONFIG_EXT_WARPED_MOTION
+    // Select the samples according to motion vector difference
+    if (mbmi->num_proj_ref[0] > 1)
+      mbmi->num_proj_ref[0] = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
+                                            mbmi->num_proj_ref[0], bsize);
 #endif  // CONFIG_EXT_WARPED_MOTION
   }