Add overhang to border to get better gradients

Some improvement in coding results with warped_causal_interintra
mode is observed.
diff --git a/aom_util/debug_util.c b/aom_util/debug_util.c
index ba33088..0846b24 100644
--- a/aom_util/debug_util.c
+++ b/aom_util/debug_util.c
@@ -100,8 +100,8 @@
 static int frame_buf_idx_r = 0;
 static int frame_buf_idx_w = 0;
 #define MAX_FRAME_BUF_NUM 6
-#define MAX_FRAME_STRIDE 1280
-#define MAX_FRAME_HEIGHT 720
+#define MAX_FRAME_STRIDE 1920
+#define MAX_FRAME_HEIGHT 1080
 static uint16_t
     frame_pre[MAX_FRAME_BUF_NUM][3]
              [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT];  // prediction only
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index a1dac85..40bdfd8 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -3579,8 +3579,10 @@
 static INLINE int motion_mode_allowed(const AV1_COMMON *cm,
                                       const MACROBLOCKD *xd,
                                       const CANDIDATE_MV *ref_mv_stack,
-                                      const MB_MODE_INFO *mbmi) {
+                                      const MB_MODE_INFO *mbmi,
+                                      int ref_base_qindex) {
   (void)ref_mv_stack;
+  (void)ref_base_qindex;
   const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y];
   int enabled_motion_modes = cm->features.enabled_motion_modes;
 
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 5778002..ef10309 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -470,6 +470,8 @@
   MOTION_MODE motion_mode;
   /*! \brief Number of samples used by spatial warp prediction */
   uint8_t num_proj_ref;
+  /*! \brief Number of samples used by spatial warp prediction post pruning */
+  uint8_t num_proj_ref_pruned;
   /*! \brief The number of overlapped neighbors above/left for obmc/warp motion
    * mode. */
   uint8_t overlappable_neighbors[2];
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index eef878e..bf3ced3 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -2677,33 +2677,48 @@
 // motion vector mv so that the pixels are correctly aligned.
 // ref is an integer shifted buffer based on floor of the motion vector mv.
 // Note top is assumed to have alllocaton of at least (border +
-// inner_border) * bw, with top_stride being at least bw.
+// inner_border) * (bw + 2 * overhang), with top_stride being at least
+// bw + 2 * overhang.
 // Likewise left is assumed to have alllocaton of at least (border +
-// inner_border) * bh, with left_stride being at least border + inner_border.
+// inner_border) * (bh + 2 * overhang), with left_stride being at least
+// border + inner_border.
 void av1_prepare_inter_topleft(const uint16_t *ref, int ref_stride,
                                BLOCK_SIZE bsize, int border, int inner_border,
-                               MV mv, uint16_t *top, int top_stride,
-                               uint16_t *left, int left_stride, int bd) {
+                               int overhang, MV mv, uint16_t *top,
+                               int top_stride, uint16_t *left, int left_stride,
+                               int bd) {
   const int phase_x = (mv.col << 1) & 15;
   const int phase_y = (mv.row << 1) & 15;
   const int bw = block_size_wide[bsize];
   const int bh = block_size_high[bsize];
   uint16_t scratch[MAX_INTERINTRA_TOPLEFT_SIZE];
 
-  int scratch_stride = bw;
-  for (int i = -border; i < 2 + inner_border; ++i) {
-    cubic_phase_shift1(ref + i * ref_stride, 1, bw, phase_x, 1,
-                       scratch + (i + border) * scratch_stride, 1);
+  if (phase_x == 0 && phase_y == 0) {
+    for (int i = 0; i < border + inner_border; ++i) {
+      memcpy(top + i * top_stride, ref + (i - border) * ref_stride - overhang,
+             (bw + 2 * overhang) * sizeof(*top));
+    }
+    for (int i = 0; i < bh + 2 * overhang; ++i) {
+      memcpy(left + i * left_stride, ref + (i - overhang) * ref_stride - border,
+             (border + inner_border) * sizeof(*left));
+    }
+    return;
   }
-  for (int i = 0; i < bw; ++i) {
+  int scratch_stride = bw + 2 * overhang;
+  for (int i = -border; i < 2 + inner_border; ++i) {
+    cubic_phase_shift1(ref + i * ref_stride - overhang, 1, bw + 2 * overhang,
+                       phase_x, 1, scratch + (i + border) * scratch_stride, 1);
+  }
+  for (int i = 0; i < bw + 2 * overhang; ++i) {
     cubic_phase_shift2(scratch + i, scratch_stride, border + inner_border,
                        phase_y, 0, top + i, top_stride, bd);
   }
   scratch_stride = border + inner_border + 2;
   for (int i = -border; i < 2 + inner_border; ++i)
-    cubic_phase_shift1(ref + i, ref_stride, bh, phase_y, 1,
-                       scratch + i + border, scratch_stride);
-  for (int i = 0; i < bh; ++i) {
+    cubic_phase_shift1(ref + i - overhang * ref_stride, ref_stride,
+                       bh + 2 * overhang, phase_y, 1, scratch + i + border,
+                       scratch_stride);
+  for (int i = 0; i < bh + 2 * overhang; ++i) {
     cubic_phase_shift2(scratch + i * scratch_stride, 1, border + inner_border,
                        phase_x, 0, left + i * left_stride, 1, bd);
   }
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 295a09f..753688f 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -46,13 +46,16 @@
   2  // May be neded to calculate gradient.
      // This border is one the inner side so
      // the load area does not change.
-#define MAX_INTERINTRA_TOPLEFT_SIZE \
-  ((MAX_INTERINTRA_BORDER + MAX_INTERINTRA_INNER_BORDER + 2) * MAX_SB_SIZE)
+#define MAX_INTERINTRA_OVERHANG 1
+#define MAX_INTERINTRA_TOPLEFT_SIZE                            \
+  ((MAX_INTERINTRA_BORDER + MAX_INTERINTRA_INNER_BORDER + 2) * \
+   (2 * MAX_INTERINTRA_OVERHANG + MAX_SB_SIZE))
 
 void av1_prepare_inter_topleft(const uint16_t *ref, int ref_stride,
                                BLOCK_SIZE bsize, int border, int inner_border,
-                               MV mv, uint16_t *top, int top_stride,
-                               uint16_t *left, int left_stride, int bd);
+                               int overhang, MV mv, uint16_t *top,
+                               int top_stride, uint16_t *left, int left_stride,
+                               int bd);
 #endif  // CONFIG_THICK_INTERINTRA_BORDER
 
 #if !CONFIG_WEDGE_MOD_EXT
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 6f0156a..3fd82b6 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -1395,10 +1395,30 @@
 #endif  // CONFIG_TEMPORAL_GLOBAL_MV
 
 #if CONFIG_INTERINTRA_WARP
-static int get_gradient(const uint16_t *x, int stride) {
+// Note ext1 = 1 corresponds to the case when 2 prior samples are available
+// in the beginning of the sequence and so no padding is needed.
+// ext1 = 0 corresponds to the case when only 1 prior sample is available
+// in the beginning of the sequence.
+// Likewise ext2 = 1 corresponds to the case when 2 external samples are
+// available at the end of the sequence and so no padding is needed at the end.
+// ext2 = 0 corresponds to the case when only 1 xeternal sample is available
+// at the end of the sequence.
+static int get_gradient(const uint16_t *x, int stride, int ext1, int ext2) {
   static const int16_t gradient[2] = { 88, -12 };
-  int g = (x[stride] - x[-stride]) * gradient[0] +
-          (x[2 * stride] - x[-2 * stride]) * gradient[1];
+  int g;
+  if (ext1 && ext2) {
+    g = (x[stride] - x[-stride]) * gradient[0] +
+        (x[2 * stride] - x[-2 * stride]) * gradient[1];
+  } else if (!ext1 && ext2) {
+    g = (x[stride] - x[-stride]) * gradient[0] +
+        (x[2 * stride] - 2 * x[-1 * stride] + x[0]) * gradient[1];
+  } else if (ext1 && !ext2) {
+    g = (x[stride] - x[-stride]) * gradient[0] +
+        (2 * x[1 * stride] - x[0] - x[-2 * stride]) * gradient[1];
+  } else {
+    g = (x[stride] - x[-stride]) * gradient[0] +
+        (2 * x[1 * stride] - 2 * x[-1 * stride]) * gradient[1];
+  }
   g = ROUND_POWER_OF_TWO_SIGNED(g, FILTER_BITS);
   return g;
 }
@@ -1410,6 +1430,7 @@
  * use around this reference block.
  * mv - motion vector
  */
+#define GRAD_THRESH 1
 static int find_interintra_rotzoom_int(const uint16_t *src, int src_stride,
                                        const uint16_t *ref, int ref_stride,
                                        BLOCK_SIZE bsize, MV mv,
@@ -1417,25 +1438,33 @@
                                        int mi_col, int bd) {
   const int border = MAX_INTERINTRA_BORDER;
   const int inner_border = MAX_INTERINTRA_INNER_BORDER;
+  const int overhang = MAX_INTERINTRA_OVERHANG;
 
   const int bw = block_size_wide[bsize];
   const int bh = block_size_high[bsize];
   uint16_t top[MAX_INTERINTRA_TOPLEFT_SIZE];
   uint16_t left[MAX_INTERINTRA_TOPLEFT_SIZE];
-  const int top_stride = bw;
+  const int top_stride = bw + 2 * overhang;
   const int left_stride = border + inner_border;
-  av1_prepare_inter_topleft(ref, ref_stride, bsize, border, inner_border, mv,
-                            top, top_stride, left, left_stride, bd);
+  av1_prepare_inter_topleft(ref, ref_stride, bsize, border, inner_border,
+                            overhang, mv, top, top_stride, left, left_stride,
+                            bd);
+  const int off = 1;
   int32_t A[2][2] = { { 0, 0 }, { 0, 0 } };
   int32_t B[2] = { 0, 0 };
-  for (int i = 2; i < border; ++i) {
-    for (int j = 2; j < bw - 2; ++j) {
-      const int d = (*(top + i * top_stride + j) -
+  for (int i = 1; i < border; ++i) {
+    for (int j = off; j < bw - off; ++j) {
+      const int d = (*(top + i * top_stride + j + overhang) -
                      *(src + (i - border) * src_stride + j));
       const int y = (i - border) - (bh / 2 - 1);
       const int x = j - (bw / 2 - 1);
-      const int gx = get_gradient(top + i * top_stride + j, 1);
-      const int gy = get_gradient(top + i * top_stride + j, top_stride);
+      const int gx = get_gradient(top + i * top_stride + j + overhang, 1,
+                                  j > 1 - overhang, j < bw - 2 + overhang);
+      const int gy = get_gradient(top + i * top_stride + j + overhang,
+                                  top_stride, i > 1, 1);
+#if GRAD_THRESH
+      if (abs(gx) < GRAD_THRESH && abs(gy) < GRAD_THRESH) continue;
+#endif  // GRAD_THRESH
       const int p1 = x * gx + y * gy;
       const int p2 = y * gx - x * gy;
       A[0][0] += p1 * p1;
@@ -1445,14 +1474,20 @@
       B[1] += p2 * d;
     }
   }
-  for (int i = 2; i < bh - 2; ++i) {
-    for (int j = 2; j < border; ++j) {
-      const int d = (*(left + i * left_stride + j) -
+  for (int i = off; i < bh - off; ++i) {
+    for (int j = 1; j < border; ++j) {
+      const int d = (*(left + (i + overhang) * left_stride + j) -
                      *(src + i * src_stride + j - border));
       const int y = i - (bh / 2 - 1);
       const int x = (j - border) - (bw / 2 - 1);
-      const int gx = get_gradient(left + i * left_stride + j, 1);
-      const int gy = get_gradient(left + i * left_stride + j, left_stride);
+      const int gx =
+          get_gradient(left + (i + overhang) * left_stride + j, 1, j > 1, 1);
+      const int gy =
+          get_gradient(left + (i + overhang) * left_stride + j, left_stride,
+                       i > 1 - overhang, i < bh - 2 + overhang);
+#if GRAD_THRESH
+      if (abs(gx) < GRAD_THRESH && abs(gy) < GRAD_THRESH) continue;
+#endif  // GRAD_THRESH
       const int p1 = x * gx + y * gy;
       const int p2 = y * gx - x * gy;
       A[0][0] += p1 * p1;
@@ -1506,10 +1541,9 @@
 }
 
 int av1_find_projection_interintra(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                                   MV mv, WarpedMotionParams *wm_params) {
+                                   const uint16_t *dst, int dst_stride, MV mv,
+                                   WarpedMotionParams *wm_params) {
   const struct macroblockd_plane *pd = &xd->plane[0];
-  const int dst_stride = pd->dst.stride;
-  uint16_t *const dst = pd->dst.buf;
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
 
@@ -1528,12 +1562,11 @@
 }
 
 int av1_find_projection_interintra_ext(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                       const uint16_t *dst, int dst_stride,
                                        MV mv, WarpedMotionParams *wm_params,
                                        int width, int height,
                                        uint16_t *tmpbuf) {
   const struct macroblockd_plane *pd = &xd->plane[0];
-  const int dst_stride = pd->dst.stride;
-  uint16_t *const dst = pd->dst.buf;
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
 
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index f566cad..bd55357 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -326,8 +326,10 @@
 #endif  // CONFIG_TEMPORAL_GLOBAL_MV
 #if CONFIG_INTERINTRA_WARP
 int av1_find_projection_interintra(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
-                                   MV mv, WarpedMotionParams *wm_params);
+                                   const uint16_t *dst, int dst_stride, MV mv,
+                                   WarpedMotionParams *wm_params);
 int av1_find_projection_interintra_ext(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                       const uint16_t *dst, int dst_stride,
                                        MV mv, WarpedMotionParams *wm_params,
                                        int width, int height, uint16_t *tmpbuf);
 #endif  // CONFIG_INTERINTRA_WARP
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 0a4a5ea..bdb5dd5 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -1337,9 +1337,9 @@
     mbmi->wm_params[0].wmtype = ROTZOOM;
     mbmi->wm_params[0].invalid = 0;
     MV mv = mbmi->mv[0].as_mv;
-    if (av1_find_projection_interintra_ext(xd, bsize, mv, &mbmi->wm_params[0],
-                                           cm->width, cm->height,
-                                           dcb->mc_buf[0])) {
+    if (av1_find_projection_interintra_ext(
+            xd, bsize, xd->plane[0].dst.buf, xd->plane[0].dst.stride, mv,
+            &mbmi->wm_params[0], cm->width, cm->height, dcb->mc_buf[0])) {
 #if WARPED_MOTION_DEBUG
       printf("Warning: unexpected warped model from aomenc\n");
 #endif
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 582514c..55751d6 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -449,8 +449,10 @@
 #if CONFIG_WARP_REF_LIST
   mbmi->max_num_warp_candidates = 0;
 #endif  // CONFIG_WARP_REF_LIST
+  const RefCntBuffer *const refbuf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
   const int allowed_motion_modes =
-      motion_mode_allowed(cm, xd, xd->ref_mv_stack[mbmi->ref_frame[0]], mbmi);
+      motion_mode_allowed(cm, xd, xd->ref_mv_stack[mbmi->ref_frame[0]], mbmi,
+                          refbuf ? refbuf->base_qindex : -1);
 
 #if CONFIG_WARPMV
   if (mbmi->mode == WARPMV) {
@@ -2680,6 +2682,7 @@
 #endif  // CONFIG_TIP
           !mbmi->skip_mode && !has_second_ref(mbmi)) {
         mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
+        mbmi->num_proj_ref_pruned = mbmi->num_proj_ref;
       }
       av1_count_overlappable_neighbors(cm, xd);
       mbmi->motion_mode = read_motion_mode(cm, xd, mbmi, r);
@@ -2803,6 +2806,7 @@
 #endif  // CONFIG_TIP
       !mbmi->skip_mode && !has_second_ref(mbmi)) {
     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
+    mbmi->num_proj_ref_pruned = mbmi->num_proj_ref;
   }
 
   av1_count_overlappable_neighbors(cm, xd);
@@ -2865,6 +2869,7 @@
 #endif  // CONFIG_TIP
       !mbmi->skip_mode && !has_second_ref(mbmi)) {
     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
+    mbmi->num_proj_ref_pruned = mbmi->num_proj_ref;
   }
   av1_count_overlappable_neighbors(cm, xd);
 
@@ -2942,12 +2947,12 @@
     MV mv = mbmi->mv[0].as_mv;
 
     if (mbmi->num_proj_ref > 1) {
-      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
-                                             mbmi->num_proj_ref, bsize);
+      mbmi->num_proj_ref_pruned = av1_selectSamples(
+          &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
     }
 
-    if (av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, mv,
-                            &mbmi->wm_params[0], mi_row, mi_col)) {
+    if (av1_find_projection(mbmi->num_proj_ref_pruned, pts, pts_inref, bsize,
+                            mv, &mbmi->wm_params[0], mi_row, mi_col)) {
 #if WARPED_MOTION_DEBUG
       printf("Warning: unexpected warped model from aomenc\n");
 #endif
@@ -3006,11 +3011,11 @@
     mbmi->wm_params.invalid = 0;
 
     if (mbmi->num_proj_ref > 1) {
-      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
-                                             mbmi->num_proj_ref, bsize);
+      mbmi->num_proj_ref_pruned = av1_selectSamples(
+          &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
     }
 
-    if (av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
+    if (av1_find_projection(mbmi->num_proj_ref_pruned, pts, pts_inref, bsize,
                             mbmi->mv[0].as_mv, &mbmi->wm_params, mi_row,
                             mi_col)) {
 #if WARPED_MOTION_DEBUG
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index eda9230..a07a261 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -498,8 +498,10 @@
     const AV1_COMMON *cm, MACROBLOCKD *xd, const MB_MODE_INFO *mbmi,
     const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, aom_writer *w) {
   const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y];
+  const RefCntBuffer *const refbuf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
   const int allowed_motion_modes =
-      motion_mode_allowed(cm, xd, mbmi_ext_frame->ref_mv_stack, mbmi);
+      motion_mode_allowed(cm, xd, mbmi_ext_frame->ref_mv_stack, mbmi,
+                          refbuf ? refbuf->base_qindex : -1);
   assert((allowed_motion_modes & (1 << mbmi->motion_mode)) != 0);
   assert((cm->features.enabled_motion_modes & (1 << mbmi->motion_mode)) != 0);
 
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 8539951..601e6b5 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -5178,7 +5178,7 @@
 #else
   WarpedMotionParams best_wm_params = mbmi->wm_params;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-  int best_num_proj_ref = mbmi->num_proj_ref;
+  int best_num_proj_ref_pruned = mbmi->num_proj_ref_pruned;
   unsigned int bestmse;
   const SubpelMvLimits *mv_limits = &ms_params->mv_limits;
 
@@ -5217,16 +5217,17 @@
         memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
         memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
         if (total_samples > 1)
-          mbmi->num_proj_ref =
+          mbmi->num_proj_ref_pruned =
               av1_selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
-                                 this_mv, &mbmi->wm_params[0], mi_row,
+        if (!av1_find_projection(mbmi->num_proj_ref_pruned, pts, pts_inref,
+                                 bsize, this_mv, &mbmi->wm_params[0], mi_row,
                                  mi_col)) {
 #else
-        if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
-                                 this_mv, &mbmi->wm_params, mi_row, mi_col)) {
+        if (!av1_find_projection(mbmi->num_proj_ref_pruned, pts, pts_inref,
+                                 bsize, this_mv, &mbmi->wm_params, mi_row,
+                                 mi_col)) {
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
           thismse = compute_motion_cost(xd, cm, ms_params, bsize, &this_mv);
 
@@ -5237,7 +5238,7 @@
 #else
             best_wm_params = mbmi->wm_params;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-            best_num_proj_ref = mbmi->num_proj_ref;
+            best_num_proj_ref_pruned = mbmi->num_proj_ref_pruned;
             bestmse = thismse;
           }
         }
@@ -5258,16 +5259,16 @@
 #else
   mbmi->wm_params = best_wm_params;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-  mbmi->num_proj_ref = best_num_proj_ref;
+  mbmi->num_proj_ref_pruned = best_num_proj_ref_pruned;
   return bestmse;
 }
 
 #if CONFIG_INTERINTRA_WARP
 // Refines MV in a small range
 unsigned int av1_refine_warped_interintra_mv(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm,
-    const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, BLOCK_SIZE bsize,
-    WARP_SEARCH_METHOD search_method, int num_iterations) {
+    MACROBLOCKD *xd, const AV1_COMMON *const cm, const uint16_t *dst,
+    int dst_stride, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
+    BLOCK_SIZE bsize, WARP_SEARCH_METHOD search_method, int num_iterations) {
   MB_MODE_INFO *mbmi = xd->mi[0];
 
   const MV *neighbors = warp_search_info[search_method].neighbors;
@@ -5313,10 +5314,10 @@
                      best_mv->col + neighbors[idx].col * (1 << mv_shift) };
       if (av1_is_subpelmv_in_range(mv_limits, this_mv)) {
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        if (!av1_find_projection_interintra(xd, bsize, this_mv,
+        if (!av1_find_projection_interintra(xd, bsize, dst, dst_stride, this_mv,
                                             &mbmi->wm_params[0])) {
 #else
-        if (!av1_find_projection_interintra(xd, bsize, this_mv,
+        if (!av1_find_projection_interintra(xd, bsize, dst, dst_stride, this_mv,
                                             &mbmi->wm_params)) {
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
           thismse = compute_motion_cost(xd, cm, ms_params, bsize, &this_mv);
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index da753ce..5bd9051 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -502,9 +502,9 @@
 
 #if CONFIG_INTERINTRA_WARP
 unsigned int av1_refine_warped_interintra_mv(
-    MACROBLOCKD *xd, const AV1_COMMON *const cm,
-    const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, BLOCK_SIZE bsize,
-    WARP_SEARCH_METHOD search_method, int num_iterations);
+    MACROBLOCKD *xd, const AV1_COMMON *const cm, const uint16_t *dst,
+    int dst_stride, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
+    BLOCK_SIZE bsize, WARP_SEARCH_METHOD search_method, int num_iterations);
 #endif  // CONFIG_INTERINTRA_WARP
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index 410216d..e2ed6cf 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -1300,8 +1300,11 @@
       }
 #endif  // CONFIG_BAWP
 #if CONFIG_EXTENDED_WARP_PREDICTION
+      const RefCntBuffer *const refbuf =
+          get_ref_frame_buf(cm, mbmi->ref_frame[0]);
       const int allowed_motion_modes = motion_mode_allowed(
-          cm, xd, mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]], mbmi);
+          cm, xd, mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]], mbmi,
+          refbuf ? refbuf->base_qindex : -1);
       MOTION_MODE motion_mode = mbmi->motion_mode;
 
 #if CONFIG_WARPMV
@@ -1891,8 +1894,11 @@
       const int seg_ref_active = 0;
       if (!seg_ref_active && inter_block) {
 #if CONFIG_EXTENDED_WARP_PREDICTION
+        const RefCntBuffer *const refbuf =
+            get_ref_frame_buf(cm, mbmi->ref_frame[0]);
         const int allowed_motion_modes = motion_mode_allowed(
-            cm, xd, x->mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]], mbmi);
+            cm, xd, x->mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]], mbmi,
+            refbuf ? refbuf->base_qindex : -1);
         if (mbmi->motion_mode != INTERINTRA) {
           if (allowed_motion_modes & (1 << OBMC_CAUSAL)) {
             td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 78677de..a54904f 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -1666,19 +1666,17 @@
   (void)tile_data;
   av1_invalid_rd_stats(&best_rd_stats);
   aom_clear_system_state();
-  mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
+  mbmi->num_proj_ref = 1;
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
 #endif  // CONFIG_WARP_REF_LIST
 #if CONFIG_EXTENDED_WARP_PREDICTION
-  int allowed_motion_modes = motion_mode_allowed(
-      cm, xd, mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]], mbmi);
-  if ((allowed_motion_modes & (1 << WARPED_CAUSAL))) {
-    // Collect projection samples used in least squares approximation of
-    // the warped motion parameters if WARPED_CAUSAL is going to be searched.
-    mbmi->num_proj_ref = av1_findSamples(cm, xd, pts0, pts_inref0);
-  }
+  mbmi->num_proj_ref = av1_findSamples(cm, xd, pts0, pts_inref0);
+  const RefCntBuffer *const refbuf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
+  int allowed_motion_modes =
+      motion_mode_allowed(cm, xd, mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]],
+                          mbmi, refbuf ? refbuf->base_qindex : -1);
   const int total_samples = mbmi->num_proj_ref;
   if (total_samples == 0) {
     // Do not search WARPED_CAUSAL if there are no samples to use to determine
@@ -1699,6 +1697,7 @@
     last_motion_mode_allowed = OBMC_CAUSAL;
   }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+  mbmi->num_proj_ref_pruned = mbmi->num_proj_ref;
 
   const MB_MODE_INFO base_mbmi = *mbmi;
   MB_MODE_INFO best_mbmi;
@@ -1917,18 +1916,18 @@
         memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
         // Select the samples according to motion vector difference
         if (mbmi->num_proj_ref > 1) {
-          mbmi->num_proj_ref = av1_selectSamples(
+          mbmi->num_proj_ref_pruned = av1_selectSamples(
               &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
         }
 
         // Compute the warped motion parameters with a least squares fit
         //  using the collected samples
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
-                                 mbmi->mv[0].as_mv, &mbmi->wm_params[0], mi_row,
-                                 mi_col))
+        if (!av1_find_projection(mbmi->num_proj_ref_pruned, pts, pts_inref,
+                                 bsize, mbmi->mv[0].as_mv, &mbmi->wm_params[0],
+                                 mi_row, mi_col))
 #else
-      if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
+      if (!av1_find_projection(mbmi->num_proj_ref_pruned, pts, pts_inref, bsize,
                                mbmi->mv[0].as_mv, &mbmi->wm_params, mi_row,
                                mi_col))
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
@@ -2025,11 +2024,13 @@
         // Compute the warped motion parameters with a least squares fit
         //  using the collected samples
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        if (!av1_find_projection_interintra(xd, bsize, mbmi->mv[0].as_mv,
-                                            &mbmi->wm_params[0]))
+        if (!av1_find_projection_interintra(
+                xd, bsize, orig_dst->plane[0], orig_dst->stride[0],
+                mbmi->mv[0].as_mv, &mbmi->wm_params[0]))
 #else
-        if (!av1_find_projection_interintra(xd, bsize, mbmi->mv[0].as_mv,
-                                            &mbmi->wm_params))
+        if (!av1_find_projection_interintra(
+                xd, bsize, orig_dst->plane[0], orig_dst->stride[0],
+                mbmi->mv[0].as_mv, &mbmi->wm_params))
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
         {
           assert(!is_comp_pred);
@@ -2054,9 +2055,10 @@
                                               NULL);
 
             // Refine MV in a small range.
-            av1_refine_warped_interintra_mv(xd, cm, &ms_params, bsize,
-                                            cpi->sf.mv_sf.warp_search_method,
-                                            cpi->sf.mv_sf.warp_search_iters);
+            av1_refine_warped_interintra_mv(
+                xd, cm, orig_dst->plane[0], orig_dst->stride[0], &ms_params,
+                bsize, cpi->sf.mv_sf.warp_search_method,
+                cpi->sf.mv_sf.warp_search_iters);
 
             if (mv0.as_int != mbmi->mv[0].as_int) {
               // Keep the refined MV and WM parameters.
@@ -3106,6 +3108,7 @@
       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
 
   mbmi->num_proj_ref = 0;
+  mbmi->num_proj_ref_pruned = 0;
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   mbmi->ref_mv_idx = ref_mv_idx;
   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
@@ -3158,6 +3161,7 @@
 
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   mbmi->num_proj_ref = 0;
+  mbmi->num_proj_ref_pruned = 0;
   if (is_comp_pred) {
     // Only compound_average
     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
@@ -4252,6 +4256,7 @@
       if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
 
       mbmi->num_proj_ref = 0;
+      mbmi->num_proj_ref_pruned = 0;
       mbmi->motion_mode = SIMPLE_TRANSLATION;
       mbmi->ref_mv_idx = ref_mv_idx;
       set_mv_precision(mbmi, mbmi->max_mv_precision);
@@ -4347,6 +4352,7 @@
         if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
 
         mbmi->num_proj_ref = 0;
+        mbmi->num_proj_ref_pruned = 0;
         mbmi->motion_mode = SIMPLE_TRANSLATION;
         mbmi->ref_mv_idx = ref_mv_idx;
         // Compute cost for signalling this DRL index
@@ -4597,6 +4603,7 @@
             mbmi->ref_frame[1] = NONE_FRAME;
 
           mbmi->num_proj_ref = 0;
+          mbmi->num_proj_ref_pruned = 0;
           mbmi->motion_mode = SIMPLE_TRANSLATION;
           mbmi->ref_mv_idx = ref_mv_idx;
 
@@ -4910,7 +4917,6 @@
 #if CONFIG_IMPROVED_JMVD
   }
 #endif  // CONFIG_IMPROVED_JMVD
-
   if (best_rd == INT64_MAX) return INT64_MAX;
 
   // re-instate status of the best choice
@@ -9047,8 +9053,8 @@
     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
     // Select the samples according to motion vector difference
     if (mbmi->num_proj_ref > 1)
-      mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
-                                             mbmi->num_proj_ref, bsize);
+      mbmi->num_proj_ref_pruned = av1_selectSamples(
+          &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
   }
 
 #if CONFIG_C071_SUBBLK_WARPMV