2D SR

commit: dd4a6d00054a5e47c65f90ea6265a4eeed919e64 [log] [tgz]
author: Kiran Misra <misrakir@amazon.com> Fri Feb 23 16:47:03 2024 -0800
committer: Andrew Segall <asegall@amazon.com> Mon Mar 18 00:04:40 2024 -0700
tree: 72f9e92e432b7ab690bde30a6bd32e8ca169377c
parent: ca0e5fd3ac569a4a03aa333b0850219f52543f12 [diff]
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 212f1c4..4731792 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -525,20 +525,20 @@
   add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                           const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
                                                           int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8, int ref_stride,
-							  int bd, int subpel_search";
+							  int bd, int subpel_search, int is_scaled_ref";
   specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
 
   add_proto qw/void aom_highbd_dist_wtd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                               const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
                                                               int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8,
-                                                              int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search";
+                                                              int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search, int is_scaled_ref";
   specialize qw/aom_highbd_dist_wtd_comp_avg_upsampled_pred sse2/;
 
   add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                               const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
                                                               int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8,
                                                               int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
-                                                              int bd, int subpel_search";
+                                                              int bd, int subpel_search, int is_scaled_ref";
 
 
   #

diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index e6c6dbd..ce72e18 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c

@@ -520,6 +520,9 @@
           &inter_pred_params, width, height, mi_y >> pd->subsampling_y,
           mi_x >> pd->subsampling_x, pd->subsampling_x, pd->subsampling_y,
           xd->bd, is_intrabc, sf, pre_buf, filters);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+      av1_init_phase_offset(&inter_pred_params, cm);
+#endif
       av1_enc_build_one_inter_predictor(comp_pred, width, mv,
                                         &inter_pred_params);
       return;
@@ -567,12 +570,20 @@
     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
     const MV *const mv, uint16_t *comp_pred, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+    int ref_stride, int bd, int subpel_search, int is_scaled_ref) {
+#else
     int ref_stride, int bd, int subpel_search) {
+#endif
   int i, j;
 
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                            bd, subpel_search, is_scaled_ref);
+#else
                             bd, subpel_search, 0);
+#endif                           
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
       comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
@@ -607,13 +618,21 @@
     const MV *const mv, uint16_t *comp_pred, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+    int subpel_search, int is_scaled_ref) {
+#else
     int subpel_search) {
+#endif
   int i, j;
   const int fwd_offset = jcp_param->fwd_offset;
   const int bck_offset = jcp_param->bck_offset;
   aom_highbd_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width,
                               height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                              bd, subpel_search, is_scaled_ref);
+#else
                               bd, subpel_search, 0);
+#endif                              
 
   for (i = 0; i < height; i++) {
     for (j = 0; j < width; j++) {
@@ -650,13 +669,21 @@
     const MV *const mv, uint16_t *comp_pred, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
     int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+    int bd, int subpel_search, int is_scaled_ref) {
+#else
     int bd, int subpel_search) {
+#endif    
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                            bd, subpel_search, is_scaled_ref);
+#else
                             bd, subpel_search, 0);
+#endif
   aom_highbd_comp_mask_pred(comp_pred, pred, width, height, comp_pred, width,
                             mask, mask_stride, invert_mask);
-}
+  }
 
 #define HIGHBD_MASK_SUBPIX_VAR(W, H)                                         \
   unsigned int aom_highbd_8_masked_sub_pixel_variance##W##x##H##_c(          \

diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index d288ef7..0842322 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c

@@ -649,6 +649,9 @@
           &inter_pred_params, width, height, mi_y >> pd->subsampling_y,
           mi_x >> pd->subsampling_x, pd->subsampling_x, pd->subsampling_y,
           xd->bd, is_intrabc, sf, pre_buf, filters);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+      av1_init_phase_offset(&inter_pred_params, cm);
+#endif
       av1_enc_build_one_inter_predictor(comp_pred, width, mv,
                                         &inter_pred_params);
       return;
@@ -722,10 +725,18 @@
     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
     const MV *const mv, uint16_t *comp_pred16, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+    int ref_stride, int bd, int subpel_search, int is_scaled_ref) {
+#else
     int ref_stride, int bd, int subpel_search) {
+#endif
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred16, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                            bd, subpel_search, is_scaled_ref);
+#else
                             bd, subpel_search, 0);
+#endif
   /*The total number of pixels must be a multiple of 8 (e.g., 4x4).*/
   assert(!(width * height & 7));
   int n = width * height >> 3;
@@ -806,12 +817,20 @@
     const MV *const mv, uint16_t *comp_pred16, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+    int subpel_search, int is_scaled_ref) {
+#else
     int subpel_search) {
+#endif    
   int n;
   int i;
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred16, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                            bd, subpel_search, is_scaled_ref);
+#else
                             bd, subpel_search, 0);
+#endif                            
   assert(!(width * height & 7));
   n = width * height >> 3;
 

diff --git a/aom_scale/generic/yv12extend.c b/aom_scale/generic/yv12extend.c
index e681ac7..cb49ddb 100644
--- a/aom_scale/generic/yv12extend.c
+++ b/aom_scale/generic/yv12extend.c

@@ -26,6 +26,25 @@
   int i;
   const int linesize = extend_left + extend_right + width;
 
+#if 0
+  {
+    printf("\textend_plane_high: %d\n", height);
+    if (height == 360) {
+      int dbg;
+      dbg = 1;
+
+      long long int sum = 0;
+      for (int r = 0; r < height; r++) {
+        for (int c = 0; c < width; c++) {
+          sum += src[r * src_stride + c];
+        }
+      }
+
+      printf("src: %lld ", sum);
+    }
+  }
+#endif
+
   /* copy the left and right most columns out */
   uint16_t *src_ptr1 = src;
   uint16_t *src_ptr2 = src + width - 1;
@@ -58,6 +77,25 @@
     memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t));
     dst_ptr2 += src_stride;
   }
+
+#if 0
+  {
+    if (height == 360) {
+      int dbg;
+      dbg = 1;
+
+      long long int sum = 0;
+      for (int r = -extend_top; r < (height + extend_bottom); r++) {
+        for (int c = -extend_left; c < (width + extend_right); c++) {
+          sum += src[r * src_stride + c];
+        }
+      }
+
+      printf("dst: %lld ", sum);
+      printf("\n");
+    }
+  }
+#endif
 }
 
 void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,

diff --git a/aom_scale/yv12config.h b/aom_scale/yv12config.h
index 405efb1..0fb18d4 100644
--- a/aom_scale/yv12config.h
+++ b/aom_scale/yv12config.h

@@ -26,11 +26,23 @@
 
 /*!\cond */
 
+#if CONFIG_2D_SR_SCALE_EXT 
+#define AOMINNERBORDERINPIXELS 480
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+#define AOM_INTERP_EXTEND 24
+#else
+#define AOM_INTERP_EXTEND 4
+#endif
+#define AOM_BORDER_IN_PIXELS 864 
+#define AOM_ENC_NO_SCALE_BORDER 480
+#define AOM_DEC_BORDER_IN_PIXELS 192
+#else  // CONFIG_2D_SR_SCALE_EXT 
 #define AOMINNERBORDERINPIXELS 160
 #define AOM_INTERP_EXTEND 4
 #define AOM_BORDER_IN_PIXELS 288
 #define AOM_ENC_NO_SCALE_BORDER 160
 #define AOM_DEC_BORDER_IN_PIXELS 64
+#endif  // CONFIG_2D_SR_SCALE_EXT 
 
 /*!\endcond */
 /*!

diff --git a/apps/aomdec.c b/apps/aomdec.c
index f8de253..1710319 100644
--- a/apps/aomdec.c
+++ b/apps/aomdec.c

@@ -117,7 +117,7 @@
   &threadsarg,     &verbosearg, &scalearg,      &fb_arg,
   &md5arg,         &verifyarg,  &framestatsarg, &continuearg,
   &outbitdeptharg, &isannexb,   &oppointarg,    &outallarg,
-  &skipfilmgrain,  NULL
+  &skipfilmgrain, NULL
 };
 
 #if CONFIG_LIBYUV

diff --git a/av1/av1.cmake b/av1/av1.cmake
index 409084b..33aa2bc 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake

@@ -127,6 +127,12 @@
        "${AOM_ROOT}/av1/common/tip.h")
 endif()
 
+if(CONFIG_2D_SR)
+  list(APPEND AOM_AV1_COMMON_SOURCES
+       "${AOM_ROOT}/av1/common/lanczos_resample.c"
+       "${AOM_ROOT}/av1/common/lanczos_resample.h")
+endif()
+
 list(
   APPEND
   AOM_AV1_DECODER_SOURCES

diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index 987f0ed..9ea985a 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c

@@ -731,10 +731,17 @@
   RANGE_CHECK(cfg, rc_resize_kf_denominator, SCALE_NUMERATOR,
               SCALE_NUMERATOR << 1);
   RANGE_CHECK_HI(cfg, rc_superres_mode, AOM_SUPERRES_AUTO);
+  #if CONFIG_2D_SR_SCALE_EXT
+  RANGE_CHECK(cfg, rc_superres_denominator, SCALE_NUMERATOR,
+              SCALE_NUMERATOR * 6);
+  RANGE_CHECK(cfg, rc_superres_kf_denominator, SCALE_NUMERATOR,
+              SCALE_NUMERATOR * 6);
+  #else  // CONFIG_2D_SR_SCALE_EXT 
   RANGE_CHECK(cfg, rc_superres_denominator, SCALE_NUMERATOR,
               SCALE_NUMERATOR << 1);
   RANGE_CHECK(cfg, rc_superres_kf_denominator, SCALE_NUMERATOR,
               SCALE_NUMERATOR << 1);
+  #endif  // CONFIG_2D_SR_SCALE_EXT
   RANGE_CHECK(cfg, rc_superres_qthresh, 1, 255);
   RANGE_CHECK(cfg, rc_superres_kf_qthresh, 1, 255);
   RANGE_CHECK_HI(extra_cfg, cdf_update_mode, 2);
@@ -1802,7 +1809,7 @@
         superres_cfg->superres_scale_denominator == SCALE_NUMERATOR &&
         superres_cfg->superres_kf_scale_denominator == SCALE_NUMERATOR) {
       disable_superres(superres_cfg);
-    }
+    }   
     if (superres_cfg->superres_mode == AOM_SUPERRES_QTHRESH &&
         superres_cfg->superres_qthresh == 255 &&
         superres_cfg->superres_kf_qthresh == 255) {
@@ -1817,6 +1824,15 @@
     disable_superres(superres_cfg);
   }
 
+#if CONFIG_2D_SR_TILE_CONFIG
+  if (superres_cfg->enable_superres && (superres_cfg->superres_mode == AOM_SUPERRES_FIXED &&
+        (superres_cfg->superres_scale_denominator != SCALE_NUMERATOR ||
+        superres_cfg->superres_kf_scale_denominator != SCALE_NUMERATOR))){
+    tile_cfg->tile_columns = 0;
+    tile_cfg->tile_rows = 0;
+  }
+#endif  // CONFIG_2D_SR_TILE_CONFIG
+
   if (input_cfg->limit == 1) {
     // still picture mode, display model and timing is meaningless
     dec_model_cfg->display_model_info_present_flag = 0;
@@ -3137,6 +3153,7 @@
                                 src_time_stamp, src_end_time_stamp)) {
         res = update_error_state(ctx, &cpi->common.error);
       }
+
       aom_img_free(hbd_img);
       ctx->next_frame_flags = 0;
     }

diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 65ff31d..f320a52 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c

@@ -82,6 +82,21 @@
     const int tile_stripes = (ext_h + 63) / 64;
     num_stripes += tile_stripes;
   }
+#if CONFIG_2D_SR
+  // TODO(yuec, debargha): This is a temporary fix to handle mismatch between
+  // stripes in the coded domain vs. stripes in the upscaled domain. In AV1
+  // they were the same and there was no issue. However in
+  // CONFIG_2D_SR, the actual number of stripes in the upscaled domain
+  // can be twice as many as stripes in the coded domain. Hence this change.
+  // Going forward, wee need to rethink striped loop-restoration in the
+  // context of 2D superres, and implement a different strategy or remove
+  // striping altogether, based on consultation with hardware teams.
+#if CONFIG_2D_SR_SCALE_EXT
+  num_stripes *= 6;
+#else  // CONFIG_2D_SR_SCALE_EXT
+  num_stripes <<= 1;
+#endif  // CONFIG_2D_SR_SCALE_EXT  
+#endif  // CONFIG_2D_SR
 
   // Now we need to allocate enough space to store the line buffers for the
   // stripes
@@ -314,13 +329,16 @@
 int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
   CommonModeInfoParams *const mi_params = &cm->mi_params;
   mi_params->set_mb_mi(mi_params, width, height);
+
 #if CONFIG_PC_WIENER
   if (alloc_mi(mi_params, cm)) goto fail;
 #else
   if (alloc_mi(mi_params)) goto fail;
 #endif  // CONFIG_PC_WIENER
+
   CommonSBInfoParams *const sbi_params = &cm->sbi_params;
   set_sb_si(cm);
+
   if (alloc_sbi(sbi_params)) goto fail;
 
   return 0;

diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index b6d2063..ac9c755 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h

@@ -994,6 +994,11 @@
   CctxType *cctx_type_map;
 #endif  // CONFIG_CROSS_CHROMA_TX
 
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  uint8_t superres_scale_denominator;
+  int frm_width, frm_height;
+#endif
+
   /**
    * \name Function pointers to allow separate logic for encoder and decoder.
    */
@@ -1324,6 +1329,14 @@
    * than the coded dimensions of the current frame.
    */
   RefCntBuffer *scaled_tip_frame;
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  /*!
+   * Buffer into which the upscaled interpolated tip frame will be stored and
+   * other related info. This is required for generating and upscaled version
+   * of the tip_ref frame for use in the case when super-res is used.
+   */
+  YV12_BUFFER_CONFIG upscaled_tip_frame_buf;
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
   /*!
    * Check a block is already interpolated
    */
@@ -1402,11 +1415,24 @@
   int superres_upscaled_height; /*!< Super-resolved frame height */
   /**@}*/
 
+#if CONFIG_2D_SR
+  /**
+   * \name Super-resolution scaling factor information.
+   * The index, numerator, and denominator of the superres scale used by this
+   * frame.
+   */
+  /**@{*/
+  uint8_t superres_scale_index;       /*!< Superres scaling index */
+  uint8_t superres_scale_numerator;   /*!< Superres scaling numerator */
+  uint8_t superres_scale_denominator; /*!< Superres scaling denominator */
+  /**@}*/
+#else   // CONFIG_2D_SR
   /*!
    * The denominator of the superres scale used by this frame.
    * Note: The numerator is fixed to be SCALE_NUMERATOR.
    */
   uint8_t superres_scale_denominator;
+#endif  // CONFIG_2D_SR
 
   /*!
    * If true, buffer removal times are present.
@@ -3641,12 +3667,14 @@
 
 #if CONFIG_TIP
   if (is_tip_ref_frame(mbmi->ref_frame[0])) {
+//    printf("Warp enabled for TIP? %d", (allowed_motion_modes & enabled_motion_modes));
     return (allowed_motion_modes & enabled_motion_modes);
   }
 #endif  // CONFIG_TIP
 
 #if CONFIG_ALLOW_SAME_REF_COMPOUND
   if (mbmi->ref_frame[0] == mbmi->ref_frame[1]) {
+//    printf("Warp enabled for same ref? %d", (allowed_motion_modes & enabled_motion_modes));
     return (allowed_motion_modes & enabled_motion_modes);
   }
 #endif  // CONFIG_ALLOW_SAME_REF_COMPOUND
@@ -3655,6 +3683,7 @@
     const TransformationType gm_type =
         cm->global_motion[mbmi->ref_frame[0]].wmtype;
     if (is_global_mv_block(mbmi, gm_type)) {
+//      printf("Warp enabled for globa? %d", (allowed_motion_modes & enabled_motion_modes));
       return (allowed_motion_modes & enabled_motion_modes);
     }
   }
@@ -3673,7 +3702,6 @@
   // From here on, all modes are warped, so have some common criteria:
   const int allow_warped_motion =
       motion_variation_allowed &&
-      !av1_is_scaled(xd->block_ref_scale_factors[0]) &&
       !xd->cur_frame_force_integer_mv;
 
   if (obmc_allowed && allow_warped_motion && mbmi->num_proj_ref >= 1
@@ -3717,6 +3745,7 @@
   if (warp_delta_allowed) {
     allowed_motion_modes |= (1 << WARP_DELTA);
   }
+// printf("Warp enabled? %d", (allowed_motion_modes & enabled_motion_modes));
 
   return (allowed_motion_modes & enabled_motion_modes);
 }
@@ -3753,8 +3782,12 @@
     assert(!has_second_ref(mbmi));
     const int allow_warped_motion = cm->features.allow_warped_motion;
     if (mbmi->num_proj_ref >= 1 &&
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+        (allow_warped_motion) {
+#else
         (allow_warped_motion &&
          !av1_is_scaled(xd->block_ref_scale_factors[0]))) {
+#endif
       if (xd->cur_frame_force_integer_mv) {
         return OBMC_CAUSAL;
       }

diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index e0efb4e..4319860 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl

@@ -457,7 +457,7 @@
 
 # WARPED_MOTION / GLOBAL_MOTION functions
 
-add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta, const int x_step_qn, const int y_step_qn";
 specialize qw/av1_highbd_warp_affine sse4_1 avx2/;
 
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
@@ -485,6 +485,7 @@
 add_proto qw/void av1_highbd_dist_wtd_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn, ConvolveParams *conv_params, int bd";
 add_proto qw/void av1_highbd_dist_wtd_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, ConvolveParams *conv_params, int bd";
 add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_qn, const int y_step_qn, ConvolveParams *conv_params, int bd";
+add_proto qw/void av1_highbd_convolve_2d_scale_strided/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_qn, const int y_step_qn, ConvolveParams *conv_params, int bd";
 
 specialize qw/av1_highbd_dist_wtd_convolve_2d sse4_1 avx2/;
 specialize qw/av1_highbd_dist_wtd_convolve_x sse4_1 avx2/;
@@ -494,6 +495,7 @@
 specialize qw/av1_highbd_convolve_x_sr ssse3 avx2/;
 specialize qw/av1_highbd_convolve_y_sr ssse3 avx2/;
 specialize qw/av1_highbd_convolve_2d_scale sse4_1/;
+specialize qw/av1_highbd_convolve_2d_scale_strided sse4_1/;
 
 # INTRA_EDGE functions
 add_proto qw/void av1_filter_intra_edge_high/, "uint16_t *p, int sz, int strength";

diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index 26c05fd..b0215e3 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c

@@ -462,8 +462,15 @@
 void av1_alloc_txk_skip_array(CommonModeInfoParams *mi_params, AV1_COMMON *cm) {
   // Allocate based on the MIN_TX_SIZE, which is a 4x4 block.
   for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
-    int w = mi_params->mi_cols << MI_SIZE_LOG2;
-    int h = mi_params->mi_rows << MI_SIZE_LOG2;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    int w = AOMMAX(mi_params->frm_width, cm->seq_params.max_frame_width);
+    int h = AOMMAX(mi_params->frm_height, cm->seq_params.max_frame_height);
+    assert((mi_params->frm_width == 0 && cm->seq_params.max_frame_width > 0) || (mi_params->frm_width > 0 && cm->seq_params.max_frame_width == 0) || (mi_params->frm_width == cm->seq_params.max_frame_width));
+    assert((mi_params->frm_height == 0 && cm->seq_params.max_frame_height > 0) || (mi_params->frm_height > 0 && cm->seq_params.max_frame_height == 0) || (mi_params->frm_height == cm->seq_params.max_frame_height));
+#else
+	int w = mi_params->mi_cols << MI_SIZE_LOG2;
+	int h = mi_params->mi_rows << MI_SIZE_LOG2;
+#endif     
     w = ((w + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
     h = ((h + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
     w >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_x);
@@ -477,6 +484,20 @@
 #ifndef NDEBUG
   av1_reset_txk_skip_array(cm);
 #endif  // NDEBUG
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+	  int w = mi_params->mi_cols << MI_SIZE_LOG2;
+	  int h = mi_params->mi_rows << MI_SIZE_LOG2;
+	  w = ((w + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
+	  h = ((h + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
+	  w >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_x);
+	  h >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_y);
+	  int stride = (w + MIN_TX_SIZE - 1) >> MIN_TX_SIZE_LOG2;
+	  int rows = (h + MIN_TX_SIZE - 1) >> MIN_TX_SIZE_LOG2;
+	  mi_params->tx_skip_buf_size[plane] = rows * stride;
+	  mi_params->tx_skip_stride[plane] = stride;
+  }
+#endif
 }
 
 void av1_dealloc_txk_skip_array(CommonModeInfoParams *mi_params) {
@@ -487,6 +508,10 @@
 }
 
 void av1_reset_txk_skip_array(AV1_COMMON *cm) {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  if(cm->superres_scale_denominator != SCALE_NUMERATOR)
+    return;
+#endif  
   // Allocate based on the MIN_TX_SIZE, which is a 4x4 block.
   for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
     int w = cm->mi_params.mi_cols << MI_SIZE_LOG2;
@@ -501,7 +526,12 @@
   }
 }
 
+
 void av1_reset_txk_skip_array_using_mi_params(CommonModeInfoParams *mi_params) {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  if(mi_params->superres_scale_denominator != SCALE_NUMERATOR)
+    return;
+#endif  
   for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
     memset(mi_params->tx_skip[plane], ILLEGAL_TXK_SKIP_VALUE,
            mi_params->tx_skip_buf_size[plane]);
@@ -513,6 +543,10 @@
                              TREE_TYPE tree_type,
                              const CHROMA_REF_INFO *chroma_ref_info,
                              int plane_start, int plane_end) {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  if(cm->superres_scale_denominator != SCALE_NUMERATOR)
+    return;
+#endif                             
   const bool is_chroma_ref = chroma_ref_info->is_chroma_ref;
   for (int plane = plane_start; plane < plane_end; plane++) {
     if (plane && !is_chroma_ref) {
@@ -562,6 +596,10 @@
                                const CHROMA_REF_INFO *chroma_ref_info,
                                int plane, int blk_row, int blk_col,
                                TX_SIZE tx_size) {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  if(cm->superres_scale_denominator != SCALE_NUMERATOR)
+    return;
+#endif                                
   blk_row *= 4;
   blk_col *= 4;
   mi_row = (tree_type == SHARED_PART && plane)
@@ -595,6 +633,10 @@
 
 uint8_t av1_get_txk_skip(const AV1_COMMON *cm, int mi_row, int mi_col,
                          int plane, int blk_row, int blk_col) {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  if(cm->superres_scale_denominator != SCALE_NUMERATOR)
+    return 0;
+#endif                             
   blk_row *= 4;
   blk_col *= 4;
   int w = ((cm->width + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2)
@@ -615,8 +657,15 @@
 
 void av1_alloc_class_id_array(CommonModeInfoParams *mi_params, AV1_COMMON *cm) {
   for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
-    int w = mi_params->mi_cols << MI_SIZE_LOG2;
-    int h = mi_params->mi_rows << MI_SIZE_LOG2;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    int w = AOMMAX(mi_params->frm_width, cm->seq_params.max_frame_width);
+    int h = AOMMAX(mi_params->frm_height, cm->seq_params.max_frame_height);
+    assert((mi_params->frm_width == 0 && cm->seq_params.max_frame_width > 0) || (mi_params->frm_width > 0 && cm->seq_params.max_frame_width == 0) || (mi_params->frm_width == cm->seq_params.max_frame_width));
+    assert((mi_params->frm_height == 0 && cm->seq_params.max_frame_height > 0) || (mi_params->frm_height > 0 && cm->seq_params.max_frame_height == 0) || (mi_params->frm_height == cm->seq_params.max_frame_height));
+#else
+	  int w = mi_params->mi_cols << MI_SIZE_LOG2;
+	  int h = mi_params->mi_rows << MI_SIZE_LOG2;
+#endif
     w = ((w + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
     h = ((h + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
     w >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_x);
@@ -627,6 +676,19 @@
         aom_calloc(rows * stride, sizeof(uint8_t));
     mi_params->wiener_class_id_stride[plane] = stride;
   }
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+	  int w = mi_params->mi_cols << MI_SIZE_LOG2;
+	  int h = mi_params->mi_rows << MI_SIZE_LOG2;
+	  w = ((w + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
+	  h = ((h + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
+	  w >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_x);
+	  h >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_y);
+	  int stride = (w + MIN_TX_SIZE - 1) >> MIN_TX_SIZE_LOG2;
+	  int rows = (h + MIN_TX_SIZE - 1) >> MIN_TX_SIZE_LOG2;
+	  mi_params->wiener_class_id_stride[plane] = stride;
+  }
+#endif  
 }
 
 void av1_dealloc_class_id_array(CommonModeInfoParams *mi_params) {

diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index 98ed1c5..bcb32b2 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h

@@ -381,10 +381,17 @@
 #endif  // CONFIG_D071_IMP_MSK_BLD
 
 #if CONFIG_REFINEMV
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+#define REF_BUFFER_WIDTH \
+  (6 * REFINEMV_SUBBLOCK_WIDTH + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND)
+#define REF_BUFFER_HEIGHT \
+  (6 * REFINEMV_SUBBLOCK_HEIGHT + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND)
+#else
 #define REF_BUFFER_WIDTH \
   (REFINEMV_SUBBLOCK_WIDTH + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND)
 #define REF_BUFFER_HEIGHT \
   (REFINEMV_SUBBLOCK_HEIGHT + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND)
+#endif
 typedef struct PadBlock {
   int x0;
   int x1;

diff --git a/av1/common/convolve.c b/av1/common/convolve.c
index 9846735..a8f3393 100644
--- a/av1/common/convolve.c
+++ b/av1/common/convolve.c

@@ -402,9 +402,12 @@
                                     const int subpel_x_qn, const int x_step_qn,
                                     const int subpel_y_qn, const int y_step_qn,
                                     ConvolveParams *conv_params, int bd) {
+
   int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+
   int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
              filter_params_y->taps;
+
   int im_stride = w;
   const int fo_vert = filter_params_y->taps / 2 - 1;
   const int fo_horiz = filter_params_x->taps / 2 - 1;
@@ -432,6 +435,7 @@
       im_block[y * im_stride + x] =
           (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
     }
+
     src_horiz += src_stride;
   }
 
@@ -482,6 +486,145 @@
   }
 }
 
+
+void av1_highbd_convolve_2d_scale_strided_c(const uint16_t *src, int src_stride,
+                                    uint16_t *dst, int dst_stride, int w, int h,
+                                    const InterpFilterParams *filter_params_x,
+                                    const InterpFilterParams *filter_params_y,
+                                    const int subpel_x_qn, const int x_step_qn,
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+                                    const int subpel_y_qn, int y_step_qn,
+#else
+                                    const int subpel_y_qn, const int y_step_qn,
+#endif
+                                    ConvolveParams *conv_params, int bd) {
+									
+#if CONFIG_2D_SR_SCALE_EXT && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+//  int16_t im_block[(6 * MAX_SB_SIZE + MAX_FILTER_TAP) * (6 * MAX_SB_SIZE)];
+	int16_t *im_block = (int16_t *)aom_memalign(2, (6 * MAX_SB_SIZE + MAX_FILTER_TAP) * (6 * MAX_SB_SIZE) * sizeof(int16_t));  
+#else  // CONFIG_2D_SR_SCALE_EXT
+  int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
+#endif  // CONFIG_2D_SR_SCALE_EXT
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+                   (filter_params_y->taps * conv_params->stride_scale);
+#else
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+#endif
+
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+#if !CONFIG_2D_SR_PHASE_ADJUSTMENT
+  assert(0);
+#endif
+  int y_stride = y_step_qn >> SCALE_SUBPEL_BITS;
+  y_step_qn = y_step_qn / y_stride;
+  im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+         filter_params_y->taps;
+  assert(y_step_qn == ( 1 << SCALE_SUBPEL_BITS));
+  assert(y_stride==1 || y_stride==2);
+#endif
+
+  int im_stride = w;
+  const int fo_vert = filter_params_y->taps / 2 - 1;
+  const int fo_horiz = filter_params_x->taps / 2 - 1;
+  CONV_BUF_TYPE *dst16 = conv_params->dst;
+  const int dst16_stride = conv_params->dst_stride;
+  const int bits =
+      FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+  const int use_wtd_comp_avg = is_uneven_wtd_comp_avg(conv_params);
+  assert(bits >= 0);
+  // horizontal filter
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+  const uint16_t *src_horiz = src - (fo_vert * conv_params->stride_scale) * src_stride;
+#else
+  const uint16_t *src_horiz = src - fo_vert * src_stride;
+#endif
+  for (int y = 0; y < im_h; ++y) {
+    int x_qn = subpel_x_qn;
+    for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
+      const uint16_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
+      const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(x_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *x_filter =
+          av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
+      int32_t sum = (1 << (bd + FILTER_BITS - 1));
+      for (int k = 0; k < filter_params_x->taps; ++k) {
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+        sum += x_filter[k] * src_x[(k - fo_horiz) * conv_params->stride_scale];
+#else
+        sum += x_filter[k] * src_x[k - fo_horiz];
+#endif
+      }
+      assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
+      im_block[y * im_stride + x] =
+          (int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
+    }
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+    src_horiz += y_stride * src_stride;
+#else
+    src_horiz += src_stride;
+#endif
+  }
+
+  // vertical filter
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+  int16_t *src_vert = im_block + (fo_vert * conv_params->stride_scale) * im_stride;
+#else
+  int16_t *src_vert = im_block + fo_vert * im_stride;
+#endif
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  for (int x = 0; x < w; ++x) {
+    int y_qn = subpel_y_qn;
+    for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
+      const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
+      const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+      assert(y_filter_idx < SUBPEL_SHIFTS);
+      const int16_t *y_filter =
+          av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
+      int32_t sum = 1 << offset_bits;
+      for (int k = 0; k < filter_params_y->taps; ++k) {
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT  && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+        sum += y_filter[k] * src_y[((k - fo_vert) * conv_params->stride_scale) * im_stride];
+#else
+        sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
+#endif
+      }
+      assert(0 <= sum && sum < (1 << (offset_bits + 2)));
+      CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
+      if (conv_params->is_compound) {
+        if (conv_params->do_average) {
+          int32_t tmp = dst16[y * dst16_stride + x];
+          if (use_wtd_comp_avg) {
+            tmp = tmp * conv_params->fwd_offset + res * conv_params->bck_offset;
+            tmp = tmp >> DIST_PRECISION_BITS;
+          } else {
+            tmp += res;
+            tmp = tmp >> 1;
+          }
+          /* Subtract round offset and convolve round */
+          tmp = tmp - ((1 << (offset_bits - conv_params->round_1)) +
+                       (1 << (offset_bits - conv_params->round_1 - 1)));
+          dst[y * dst_stride + x] =
+              clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
+        } else {
+          dst16[y * dst16_stride + x] = res;
+        }
+      } else {
+        /* Subtract round offset and convolve round */
+        int32_t tmp = res - ((1 << (offset_bits - conv_params->round_1)) +
+                             (1 << (offset_bits - conv_params->round_1 - 1)));
+        dst[y * dst_stride + x] =
+            clip_pixel_highbd(ROUND_POWER_OF_TWO(tmp, bits), bd);
+      }
+    }
+    src_vert++;
+  }
+#if CONFIG_2D_SR_SCALE_EXT && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+  aom_free(im_block);
+#endif
+}
+
 static void highbd_convolve_2d_facade_compound(
     const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
     const int w, const int h, const InterpFilterParams *filter_params_x,
@@ -489,6 +632,7 @@
     const int subpel_y_qn, ConvolveParams *conv_params, int bd) {
   const bool need_x = subpel_x_qn != 0;
   const bool need_y = subpel_y_qn != 0;
+
   if (!need_x && !need_y) {
     av1_highbd_dist_wtd_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
                                          conv_params, bd);
@@ -515,6 +659,7 @@
     const int subpel_y_qn, ConvolveParams *conv_params, int bd) {
   const bool need_x = subpel_x_qn != 0;
   const bool need_y = subpel_y_qn != 0;
+
   // Filters with taps > 8 are only for encoder side use.
   const int filter_x_taps_gt8 =
       (filter_params_x == NULL) ? 0 : ((filter_params_x->taps > 8) ? 1 : 0);
@@ -568,6 +713,7 @@
 
   const int need_filter_params_x = (subpel_x_qn != 0) | scaled;
   const int need_filter_params_y = (subpel_y_qn != 0) | scaled;
+
   const InterpFilterParams *filter_params_x =
       need_filter_params_x ? interp_filters[0] : NULL;
   const InterpFilterParams *filter_params_y =
@@ -577,10 +723,25 @@
     if (conv_params->is_compound) {
       assert(conv_params->dst != NULL);
     }
+
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if (conv_params->stride_scale == 1) {
+      av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
+                                   filter_params_x, filter_params_y,
+                                   subpel_x_qn, x_step_q4, subpel_y_qn,
+                                   y_step_q4, conv_params, bd);
+    } else {
+      av1_highbd_convolve_2d_scale_strided(src, src_stride, dst, dst_stride, w, h,
+                                   filter_params_x, filter_params_y,
+                                   subpel_x_qn, x_step_q4, subpel_y_qn,
+                                   y_step_q4, conv_params, bd);    
+    }
+#else
     av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
-                                 filter_params_x, filter_params_y, subpel_x_qn,
-                                 x_step_q4, subpel_y_qn, y_step_q4, conv_params,
-                                 bd);
+                                 filter_params_x, filter_params_y,
+                                 subpel_x_qn, x_step_q4, subpel_y_qn,
+                                 y_step_q4, conv_params, bd);
+#endif
   } else if (conv_params->is_compound) {
     highbd_convolve_2d_facade_compound(
         src, src_stride, dst, dst_stride, w, h, filter_params_x,
@@ -1091,6 +1252,9 @@
   int col_base = col + col_offset + tskip_lead;
   assert(col_base >= 0);
   // For width equals to zero case.
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  assert(tskip_sum_buf[col_base] >= 0);
+#endif  
   tskip_feature_accum[0] += tskip_sum_buf[col_base];
 
   // For the remaining width.
@@ -1108,6 +1272,8 @@
     const int cur_idx = (col + PC_WIENER_BLOCK_SIZE - 1) / PC_WIENER_BLOCK_SIZE;
     const int prev_idx =
         (col + PC_WIENER_BLOCK_SIZE - 2) / PC_WIENER_BLOCK_SIZE;
+    assert(tskip_sum_buf[col_base] >= 0);   
+    assert(tskip_sum_buf[cl] >= 0);   
     const int cur_diff = tskip_sum_buf[col_base] - tskip_sum_buf[cl];
     tskip_feature_accum[cur_idx] = tskip_feature_accum[prev_idx] + cur_diff;
   }

diff --git a/av1/common/convolve.h b/av1/common/convolve.h
index 89f5c8e..6e06169 100644
--- a/av1/common/convolve.h
+++ b/av1/common/convolve.h

@@ -29,6 +29,9 @@
   int is_compound;
   int fwd_offset;
   int bck_offset;
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+  int stride_scale;
+#endif
 } ConvolveParams;
 
 #if CONFIG_PC_WIENER || CONFIG_WIENER_NONSEP

diff --git a/av1/common/enums.h b/av1/common/enums.h
index e412cf9..5f1fcb5 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h

@@ -96,6 +96,9 @@
 #endif
 #endif
 #if CONFIG_ADAPTIVE_MVD
+#define CONFIG_ADAPTIVE_MVD_TEST1 0
+#define CONFIG_ADAPTIVE_MVD_TEST2 0
+#define CONFIG_ADAPTIVE_MVD_TEST3 0
 #define IMPROVED_AMVD 1
 #else
 #define IMPROVED_AMVD 0
@@ -1072,7 +1075,12 @@
 // REF_FRAMES for the cm->ref_frame_map array, 1 scratch frame for the new
 // frame in cm->cur_frame, INTER_REFS_PER_FRAME for scaled references on the
 // encoder in the cpi->scaled_ref_buf array.
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+// TODO: Change to accomodate downsampled references at encoder-side. A separate variable needs to be created for decoder.
+#define FRAME_BUFFERS (REF_FRAMES + 1 + INTER_REFS_PER_FRAME * (SUPERRES_SCALES + 1))
+#else
 #define FRAME_BUFFERS (REF_FRAMES + 1 + INTER_REFS_PER_FRAME)
+#endif
 
 #define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
 #define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
@@ -1141,8 +1149,18 @@
   SCALABILITY_SS = 14
 } UENUM1BYTE(SCALABILITY_STRUCTURES);
 
+#if CONFIG_2D_SR
+#if CONFIG_2D_SR_SCALE_EXT
+#define SUPERRES_SCALE_BITS 3
+#define SUPERRES_SCALES (1 << SUPERRES_SCALE_BITS)
+#else  // CONFIG_2D_SR_SCALE_EXT
+#define SUPERRES_SCALE_BITS 2
+#define SUPERRES_SCALES (1 << SUPERRES_SCALE_BITS)
+#endif  // CONFIG_2D_SR_SCALE_EXT
+#else  // CONFIG_2D_SR
 #define SUPERRES_SCALE_BITS 3
 #define SUPERRES_SCALE_DENOMINATOR_MIN (SCALE_NUMERATOR + 1)
+#endif  // CONFIG_2D_SR
 
 // In large_scale_tile coding, external references are used.
 #define MAX_EXTERNAL_REFERENCES 128

diff --git a/av1/common/lanczos_resample.c b/av1/common/lanczos_resample.c
new file mode 100644
index 0000000..a50d272
--- /dev/null
+++ b/av1/common/lanczos_resample.c

@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include "tools/lanczos/lanczos_resample.h"
+#include "config/aom_config.h"
+
+/* Shift down with rounding for use when n >= 0, value >= 0 */
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (((1 << (n)) >> 1))) >> (n))
+
+/* Shift down with rounding for signed integers, for use when n >= 0 */
+#define ROUND_POWER_OF_TWO_SIGNED(value, n)           \
+  (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
+                 : ROUND_POWER_OF_TWO((value), (n)))
+
+#define MAX(a, b) ((a) < (b) ? (b) : (a))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+#ifndef M_PI
+#define M_PI (3.14159265358979323846)
+#endif
+
+double get_centered_x0(int p, int q) { return (double)(q - p) / (2 * p); }
+
+double get_cosited_chroma_x0(int p, int q) { return (double)(q - p) / (4 * p); }
+
+double get_inverse_x0_numeric(int p, int q, double x0) { return -x0 * p / q; }
+
+double get_inverse_x0(int p, int q, double x0, int subsampled) {
+  if (x0 == (double)('c'))
+    x0 = get_centered_x0(p, q);
+  else if (x0 == (double)('d'))
+    x0 = subsampled ? get_cosited_chroma_x0(p, q) : get_centered_x0(p, q);
+#if CONFIG_2D_SR_ZERO_PHASE
+  else if (x0 == (double)('z'))
+    x0 = 0;
+#endif
+  return get_inverse_x0_numeric(p, q, x0);
+}
+
+static inline int doclip(int x, int low, int high) {
+  return (x < low ? low : x > high ? high : x);
+}
+
+void show_resample_filter(RationalResampleFilter *rf) {
+  printf("Resample factor: %d / %d\n", rf->p, rf->q);
+  printf("Extension type: %s\n", ext2str(rf->ext_type));
+  printf("Start = %d\n", rf->start);
+  printf("Steps = ");
+  for (int i = 0; i < rf->p; ++i) {
+    printf("%d, ", rf->steps[i]);
+  }
+  printf("\n");
+  printf("Phases = ");
+  for (int i = 0; i < rf->p; ++i) {
+    printf("%f, ", rf->phases[i]);
+  }
+  printf("\n");
+  printf("Filters [length %d, bits %d]:\n", rf->length, rf->filter_bits);
+  for (int i = 0; i < rf->p; ++i) {
+    printf("  { ");
+    for (int j = 0; j < rf->length; ++j) printf("%d, ", rf->filter[i][j]);
+    printf("  }\n");
+  }
+  printf("\n");
+}
+
+static double sinc(double x) {
+  if (fabs(x) < 1e-12) return 1.0;
+  return sin(M_PI * x) / (M_PI * x);
+}
+
+static double mod_bessel_first(double x) {
+  const double t = 0.25 * x * x;
+  double fact = 1.0;
+  double tpow = 1.0;
+  double v = 1.0;
+  double dv;
+  int k = 1;
+  do {
+    fact *= k;
+    tpow *= t;
+    dv = tpow / (fact * fact);
+    v += dv;
+    k++;
+  } while (fabs(dv) > fabs(v) * 1e-8);
+  return v;
+}
+
+// This is a window function assumed to be defined between [-1, 1] and
+// with the value at y=0 being 1.
+static double window(double y, WIN_TYPE win) {
+  switch (win) {
+    case WIN_LANCZOS: {
+      return sinc(y);
+    }
+    case WIN_LANCZOS_DIL: {
+      return sinc(y * 0.95);
+    }
+    case WIN_GAUSSIAN: {
+      const double sigma = 0.66;
+      const double sigma2 = sigma * sigma;
+      return exp(-y * y / sigma2);
+    }
+    case WIN_GENGAUSSIAN: {
+      const double alpha = 4;
+      const double sigma = 0.78;
+      return exp(-pow(fabs(y / sigma), alpha));
+    }
+    case WIN_COSINE: {
+      return cos(M_PI * y / 2);
+    }
+    case WIN_HAMMING: {
+      const double a0 = 25.0 / 46.0;
+      const double a1 = 1.0 - a0;
+      return (a0 + a1 * cos(M_PI * y));
+    }
+    case WIN_BLACKMAN: {
+      const double a0 = 0.42659;
+      const double a1 = 0.49656;
+      const double a2 = 1.0 - a0 - a1;
+      return a0 + a1 * cos(M_PI * y) + a2 * cos(2 * M_PI * y);
+    }
+    case WIN_KAISER: {
+      const double alpha = 1.32;
+      const double u = M_PI * alpha;
+      const double v = M_PI * alpha * sqrt(1 - y * y);
+      return mod_bessel_first(v) / mod_bessel_first(u);
+    }
+    default: {
+      assert(0 && "Unknown window type");
+      return 0;
+    }
+  }
+}
+
+static double kernel(double x, int a, WIN_TYPE win_type) {
+  const double absx = fabs(x);
+  if (absx < (double)a) {
+    return sinc(x) * window(x / a, win_type);
+  } else {
+    return 0.0;
+  }
+}
+
+static int get_lanczos_downsampler_filter_length(int p, int q, int a) {
+  assert(p < q);
+  return 2 * ((a * q + p - 1) / p);
+}
+
+static int get_lanczos_upsampler_filter_length(int p, int q, int a) {
+  (void)p;
+  (void)q;
+  assert(p >= q);
+  return 2 * a;
+}
+
+static void integerize_array(double *x, int len, int bits, int16_t *y) {
+  int sumy = 0;
+  for (int i = 0; i < len; ++i) {
+    y[i] = (int16_t)rint(x[i] * (1 << bits));
+    sumy += y[i];
+  }
+  while (sumy > (1 << bits)) {
+    double mx = -65536.0;
+    int imx = -1;
+    for (int i = 0; i < len; ++i) {
+      const double v = (double)y[i] - (x[i] * (1 << bits));
+      if (v > mx) {
+        mx = v;
+        imx = i;
+      }
+    }
+    y[imx] -= 1;
+    sumy -= 1;
+  }
+  while (sumy < (1 << bits)) {
+    double mx = 65536.0;
+    int imx = -1;
+    for (int i = 0; i < len; ++i) {
+      const double v = (double)y[i] - (x[i] * (1 << bits));
+      if (v < mx) {
+        mx = v;
+        imx = i;
+      }
+    }
+    y[imx] += 1;
+    sumy += 1;
+  }
+  sumy = 0;
+  for (int i = 0; i < len; ++i) {
+    sumy += y[i];
+  }
+  assert(sumy == (1 << bits));
+}
+
+static void get_lanczos_downsampler(double x, int p, int q, int a, int bits,
+                                    WIN_TYPE win_type, int16_t *ifilter) {
+  double filter[MAX_FILTER_LEN] = { 0.0 };
+  int tapsby2 = get_lanczos_downsampler_filter_length(p, q, a) / 2;
+  assert(tapsby2 * 2 <= MAX_FILTER_LEN);
+  double filter_sum = 0;
+  for (int i = -tapsby2 + 1; i <= tapsby2; ++i) {
+    const double tap = kernel((i - x) * p / q, a, win_type);
+    filter[i + tapsby2 - 1] = tap;
+    filter_sum += tap;
+  }
+  assert(filter_sum != 0.0);
+  for (int i = -tapsby2 + 1; i <= tapsby2; ++i) {
+    filter[i + tapsby2 - 1] /= filter_sum;
+  }
+  integerize_array(filter, 2 * tapsby2, bits, ifilter);
+}
+
+static void get_lanczos_upsampler(double x, int p, int q, int a, int bits,
+                                  WIN_TYPE win_type, int16_t *ifilter) {
+  double filter[MAX_FILTER_LEN] = { 0.0 };
+  int tapsby2 = get_lanczos_upsampler_filter_length(p, q, a) / 2;
+  assert(tapsby2 * 2 <= MAX_FILTER_LEN);
+  double filter_sum = 0;
+  for (int i = -tapsby2 + 1; i <= tapsby2; ++i) {
+    const double tap = kernel(i - x, a, win_type);
+    filter[i + tapsby2 - 1] = tap;
+    filter_sum += tap;
+  }
+  assert(filter_sum != 0.0);
+  for (int i = -tapsby2 + 1; i <= tapsby2; ++i) {
+    filter[i + tapsby2 - 1] /= filter_sum;
+  }
+  integerize_array(filter, 2 * tapsby2, bits, ifilter);
+}
+
+static int gcd(int p, int q) {
+  int p1 = (p < q ? p : q);
+  int q1 = (p1 == p ? q : p);
+  while (p1) {
+    const int t = p1;
+    p1 = q1 % p1;
+    q1 = t;
+  }
+  return q1;
+}
+
+const char *ext_names[] = { "Repeat", "Symmetric", "Reflect", "Gradient" };
+const char *ext2str(EXT_TYPE ext_type) { return ext_names[(int)ext_type]; }
+
+int get_resample_filter(int p, int q, int a, double x0, EXT_TYPE ext_type,
+                        WIN_TYPE win_type, int subsampled, int bits,
+                        RationalResampleFilter *rf) {
+  double offset[MAX_RATIONAL_FACTOR + 1];
+  int intpel[MAX_RATIONAL_FACTOR];
+  if (p <= 0 || q <= 0) {
+    fprintf(stderr, "Resampling numerator or denominator must be positive\n");
+    return 0;
+  }
+  const int g = gcd(p, q);
+  assert(g > 0);
+  rf->p = p / g;
+  rf->q = q / g;
+  if (rf->p <= 0 || rf->p > MAX_RATIONAL_FACTOR) {
+    fprintf(stderr, "Resampling numerator %d ratio exceeds maximum allowed\n",
+            rf->p);
+    return 0;
+  }
+  if (rf->q <= 0 || rf->q > MAX_RATIONAL_FACTOR) {
+    fprintf(stderr, "Resampling denominator %d ratio exceeds maximum allowed\n",
+            rf->q);
+    return 0;
+  }
+  rf->ext_type = ext_type;
+  rf->win_type = win_type;
+  if (x0 == (double)('c'))
+    x0 = get_centered_x0(rf->p, rf->q);
+  else if (x0 == (double)('d'))
+    x0 = subsampled ? get_cosited_chroma_x0(rf->p, rf->q)
+                    : get_centered_x0(rf->p, rf->q);
+#if CONFIG_2D_SR_ZERO_PHASE
+  else if (x0 == (double)('z'))
+    x0 = 0;
+#endif
+  rf->filter_bits = bits;
+  for (int i = 0; i < rf->p; ++i) {
+    offset[i] = (double)rf->q / (double)rf->p * i + x0;
+    intpel[i] = (int)floor(offset[i]);
+    rf->phases[i] = offset[i] - intpel[i];
+  }
+  offset[rf->p] = rf->q + x0;
+  intpel[rf->p] = (int)floor(offset[rf->p]);
+
+  rf->start = intpel[0];
+  for (int i = 0; i < rf->p; ++i) rf->steps[i] = intpel[i + 1] - intpel[i];
+  if (rf->p < rf->q) {  // downsampling
+    rf->length = get_lanczos_downsampler_filter_length(rf->p, rf->q, a);
+    if (rf->length > MAX_FILTER_LEN) {
+      fprintf(stderr, "Filter length %d ratio exceeds maximum allowed\n",
+              rf->length);
+      return 0;
+    }
+    for (int i = 0; i < rf->p; ++i) {
+      get_lanczos_downsampler(rf->phases[i], rf->p, rf->q, a, rf->filter_bits,
+                              rf->win_type, rf->filter[i]);
+    }
+  } else if (rf->p >= rf->q) {  // upsampling
+    rf->length = get_lanczos_upsampler_filter_length(rf->p, rf->q, a);
+    if (rf->length > MAX_FILTER_LEN) {
+      fprintf(stderr, "Filter length %d ratio exceeds maximum allowed\n",
+              rf->length);
+      return 0;
+    }
+    for (int i = 0; i < rf->p; ++i) {
+      get_lanczos_upsampler(rf->phases[i], rf->p, rf->q, a, rf->filter_bits,
+                            rf->win_type, rf->filter[i]);
+    }
+  }
+  return 1;
+}
+
+int is_resampler_noop(RationalResampleFilter *rf) {
+  return (rf->p == 1 && rf->q == 1 && rf->phases[0] == 0.0);
+}
+
+int get_resample_filter_inv(int p, int q, int a, double x0, EXT_TYPE ext_type,
+                            WIN_TYPE win_type, int subsampled, int bits,
+                            RationalResampleFilter *rf) {
+  double y0 = get_inverse_x0(p, q, x0, subsampled);
+  return get_resample_filter(q, p, a, y0, ext_type, win_type, subsampled, bits,
+                             rf);
+}
+
+// Assume x buffer is already extended on both sides with x pointing to the
+// leftmost pixel, and the extension values are already filled up.
+static void resample_1d_core(const int16_t *x, int inlen,
+                             RationalResampleFilter *rf, int downshift,
+                             ClipProfile *clip, int16_t *y, int outlen) {
+  (void)inlen;
+  const int tapsby2 = rf->length / 2;
+  const int16_t *xext = x;
+  xext += rf->start;
+  for (int i = 0, p = 0; i < outlen; ++i, p = (p + 1) % rf->p) {
+    int64_t sum = 0;
+    for (int j = -tapsby2 + 1; j <= tapsby2; ++j) {
+      sum += (int)rf->filter[p][j + tapsby2 - 1] * (int)xext[j];
+    }
+    sum = ROUND_POWER_OF_TWO_SIGNED(sum, downshift);
+    if (clip) {
+      y[i] = (int16_t)(clip->issigned
+                           ? doclip((int)sum, -(1 << (clip->bits - 1)),
+                                    (1 << (clip->bits - 1)) - 1)
+                           : doclip((int)sum, 0, (1 << clip->bits) - 1));
+    } else {
+      y[i] = (int16_t)doclip((int)sum, -(1 << 15), (1 << 15) - 1);
+    }
+    xext += rf->steps[p];
+  }
+}
+
+static void extend_border(int16_t *x, int inlen, EXT_TYPE ext_type,
+                          int border) {
+  switch (ext_type) {
+    case EXT_REPEAT:
+      for (int i = -border; i < 0; ++i) x[i] = x[0];
+      for (int i = 0; i < border; ++i) x[i + inlen] = x[inlen - 1];
+      break;
+    case EXT_SYMMETRIC:
+      if (inlen >= border) {
+        for (int i = -border; i < 0; ++i) x[i] = x[-i - 1];
+        for (int i = 0; i < border; ++i) x[i + inlen] = x[inlen - 1 - i];
+      } else {
+        for (int i = -border; i < 0; ++i)
+          x[i] = x[(-i - 1 > inlen - 1 ? inlen - 1 : -i - 1)];
+        for (int i = 0; i < border; ++i)
+          x[i + inlen] = x[(inlen - 1 - i < 0 ? 0 : inlen - 1 - i)];
+      }
+      break;
+    case EXT_REFLECT:
+      if (inlen > border) {
+        for (int i = -border; i < 0; ++i) x[i] = x[-i];
+        for (int i = 0; i < border; ++i) x[i + inlen] = x[inlen - 2 - i];
+      } else {
+        for (int i = -border; i < 0; ++i)
+          x[i] = x[(-i > inlen - 1 ? inlen - 1 : -i)];
+        for (int i = 0; i < border; ++i)
+          x[i + inlen] = x[(inlen - 2 - i < 0 ? 0 : inlen - 2 - i)];
+      }
+      break;
+    case EXT_GRADIENT:
+      if (inlen > border) {
+        for (int i = -border; i < 0; ++i) x[i] = 2 * x[0] - x[-i];
+        for (int i = 0; i < border; ++i)
+          x[i + inlen] = 2 * x[inlen - 1] - x[inlen - 2 - i];
+      } else {
+        for (int i = -border; i < 0; ++i)
+          x[i] = 2 * x[0] - x[(-i > inlen - 1 ? inlen - 1 : -i)];
+        for (int i = 0; i < border; ++i)
+          x[i + inlen] =
+              2 * x[inlen - 1] - x[(inlen - 2 - i < 0 ? 0 : inlen - 2 - i)];
+      }
+      break;
+  }
+}
+
+// Assume x buffer is already extended on both sides with x pointing to the
+// leftmost pixel, but the extension values are not filled up.
+static void resample_1d_xt(int16_t *x, int inlen, RationalResampleFilter *rf,
+                           int downshift, ClipProfile *clip, int16_t *y,
+                           int outlen) {
+  extend_border(x, inlen, rf->ext_type, rf->length / 2);
+  resample_1d_core(x, inlen, rf, downshift, clip, y, outlen);
+}
+
+// Assume a scratch buffer xext of size inlen + rf->length is provided
+static void resample_1d_xc(const int16_t *x, int inlen,
+                           RationalResampleFilter *rf, int downshift,
+                           ClipProfile *clip, int16_t *y, int outlen,
+                           int16_t *xext) {
+  memcpy(xext, x, sizeof(*x) * inlen);
+
+  resample_1d_xt(xext, inlen, rf, downshift, clip, y, outlen);
+}
+
+static void fill_col_to_arr(const int16_t *img, int stride, int len,
+                            int16_t *arr) {
+  int i;
+  const int16_t *iptr = img;
+  int16_t *aptr = arr;
+  for (i = 0; i < len; ++i, iptr += stride) {
+    *aptr++ = *iptr;
+  }
+}
+
+static void fill_arr_to_col(int16_t *img, int stride, int len,
+                            const int16_t *arr) {
+  int i;
+  int16_t *iptr = img;
+  const int16_t *aptr = arr;
+  for (i = 0; i < len; ++i, iptr += stride) {
+    *iptr = *aptr++;
+  }
+}
+
+void resample_1d(const int16_t *x, int inlen, RationalResampleFilter *rf,
+                 int downshift, ClipProfile *clip, int16_t *y, int outlen) {
+  const int tapsby2 = rf->length / 2;
+  int16_t *xext_ = (int16_t *)malloc((inlen + rf->length) * sizeof(*x));
+  int16_t *xext = xext_ + tapsby2;
+
+  memset(xext, 0, sizeof(*x) * inlen);
+  resample_1d_xc(xext, inlen, rf, downshift, clip, y, outlen, xext);
+
+  free(xext_);
+}
+
+void av1_resample_2d(const int16_t *x, int inwidth, int inheight, int instride,
+                     RationalResampleFilter *rfh, RationalResampleFilter *rfv,
+                     int int_extra_bits, ClipProfile *clip, int16_t *y,
+                     int outwidth, int outheight, int outstride) {
+  if (rfv == NULL || is_resampler_noop(rfv)) {
+    resample_horz(x, inwidth, inheight, instride, rfh, clip, y, outwidth,
+                  outstride);
+    return;
+  }
+  if (rfh == NULL || is_resampler_noop(rfh)) {
+    resample_vert(x, inwidth, inheight, instride, rfv, clip, y, outheight,
+                  outstride);
+    return;
+  }
+  int16_t *tmpbuf = (int16_t *)malloc(sizeof(int16_t) * outwidth * inheight);
+  const int arrsize =
+      outheight + ((inheight + rfv->length > inwidth + rfh->length)
+                       ? (inheight + rfv->length)
+                       : (inwidth + rfh->length));
+  int16_t *tmparr_ = (int16_t *)calloc(arrsize, sizeof(int16_t));
+  int16_t *tmparrh = tmparr_ + outheight + rfh->length / 2;
+  int16_t *tmparrv = tmparr_ + outheight + rfv->length / 2;
+  int16_t *tmparro = tmparr_;
+  int tmpstride = outwidth;
+  // intermediate data is stored in 16 bit buffers, so limit int_extra_bits
+  int_extra_bits = MIN(int_extra_bits, 14 - clip->bits);
+  const int downshifth = rfh->filter_bits - int_extra_bits;
+  const int downshiftv = rfh->filter_bits + int_extra_bits;
+  for (int i = 0; i < inheight; ++i) {
+    resample_1d_xc(x + instride * i, inwidth, rfh, downshifth, NULL,
+                   tmpbuf + i * tmpstride, outwidth, tmparrh);
+  }
+  for (int i = 0; i < outwidth; ++i) {
+    fill_col_to_arr(tmpbuf + i, outwidth, inheight, tmparrv);
+    resample_1d_xt(tmparrv, inheight, rfv, downshiftv, clip, tmparro,
+                   outheight);
+    fill_arr_to_col(y + i, outstride, outheight, tmparro);
+  }
+  free(tmpbuf);
+  free(tmparr_);
+}
+
+void resample_horz(const int16_t *x, int inwidth, int inheight, int instride,
+                   RationalResampleFilter *rfh, ClipProfile *clip, int16_t *y,
+                   int outwidth, int outstride) {
+  const int arrsize = inwidth + rfh->length;
+  int16_t *tmparr_ = (int16_t *)calloc(arrsize, sizeof(int16_t));
+  int16_t *tmparrh = tmparr_ + rfh->length / 2;
+  for (int i = 0; i < inheight; ++i) {
+    resample_1d_xc(x + instride * i, inwidth, rfh, rfh->filter_bits, clip,
+                   y + i * outstride, outwidth, tmparrh);
+  }
+  free(tmparr_);
+}
+
+void resample_vert(const int16_t *x, int inwidth, int inheight, int instride,
+                   RationalResampleFilter *rfv, ClipProfile *clip, int16_t *y,
+                   int outheight, int outstride) {
+  const int arrsize = outheight + inheight + rfv->length;
+  int16_t *tmparr_ = (int16_t *)calloc(arrsize, sizeof(int16_t));
+  int16_t *tmparrv = tmparr_ + outheight + rfv->length / 2;
+  int16_t *tmparro = tmparr_;
+  for (int i = 0; i < inwidth; ++i) {
+    fill_col_to_arr(x + i, instride, inheight, tmparrv);
+    resample_1d_xt(tmparrv, inheight, rfv, rfv->filter_bits, clip, tmparro,
+                   outheight);
+    fill_arr_to_col(y + i, outstride, outheight, tmparro);
+  }
+  free(tmparr_);
+}
+
+int get_resampled_output_length(int inlen, int p, int q, int force_even) {
+  if (!force_even) {
+    // round
+    return (inlen * p + q / 2) / q;
+  }
+  int outlen_floor = inlen * p / q;
+  // choose floor or ceil depending on which one is even
+  if ((outlen_floor % 2) == 1)
+    return outlen_floor + 1;
+  else
+    return outlen_floor;
+}

diff --git a/av1/common/lanczos_resample.h b/av1/common/lanczos_resample.h
new file mode 100644
index 0000000..08d9d97
--- /dev/null
+++ b/av1/common/lanczos_resample.h

@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <math.h>
+
+#define MAX_RATIONAL_FACTOR 16
+#define MAX_FILTER_LEN 320
+
+// Note: check window() function implementation for values of any
+// other params used by these windowing functions.
+typedef enum {
+  WIN_LANCZOS,      // Sinc window (i.e. Lanczos)
+  WIN_LANCZOS_DIL,  // Dilated Lanczos window
+  WIN_GAUSSIAN,     // Gaussian window
+  WIN_GENGAUSSIAN,  // Gaussian window
+  WIN_COSINE,       // Cosine window
+  WIN_HAMMING,      // Hamming Window
+  WIN_BLACKMAN,     // Blackman window
+  WIN_KAISER,       // Kaiser window
+} WIN_TYPE;
+
+typedef enum { EXT_REPEAT, EXT_SYMMETRIC, EXT_REFLECT, EXT_GRADIENT } EXT_TYPE;
+
+typedef struct {
+  int p;
+  int q;
+  int length;
+  EXT_TYPE ext_type;
+  WIN_TYPE win_type;
+  int filter_bits;
+  int start;
+  int steps[MAX_RATIONAL_FACTOR];
+  int16_t filter[MAX_RATIONAL_FACTOR][MAX_FILTER_LEN];
+  double phases[MAX_RATIONAL_FACTOR];
+} RationalResampleFilter;
+
+typedef struct {
+  int bits;
+  int issigned;
+} ClipProfile;
+
+// TODO(yuec): move functions not called by other files to lanczos_resample.c
+// TODO(yuec): consolidate this library with the copy in tools/lanczos
+double get_centered_x0(int p, int q);
+
+// x0 is assumed to be in (-1, 1)
+double get_inverse_x0_numeric(int p, int q, double x0);
+
+// In the functions below using x0 as an argument,
+// x0 is assumed to be in (-1, 1);
+//                        or 99 (ascii value of 'c') meaning centered;
+//                        or 100 (ascii value of 'd') meaning co-sited chroma
+//                        if the chroma plane is subsampled.
+double get_inverse_x0(int p, int q, double x0, int subsampled);
+
+int get_resample_filter(int p, int q, int a, double x0, EXT_TYPE ext_type,
+                        WIN_TYPE win_type, int subsampled, int bits,
+                        RationalResampleFilter *rf);
+int get_resample_filter_inv(int p, int q, int a, double x0, EXT_TYPE ext_type,
+                            WIN_TYPE win_type, int subsampled, int bits,
+                            RationalResampleFilter *rf);
+
+// whether the resampler filter is a no-op
+int is_resampler_noop(RationalResampleFilter *rf);
+
+// 16-bit versions of high-level resampling functions
+
+// Assume no extension of the input x buffer
+void resample_1d(const int16_t *x, int inlen, RationalResampleFilter *rf,
+                 int downshift, ClipProfile *clip, int16_t *y, int outlen);
+
+void av1_resample_2d(const int16_t *x, int inwidth, int inheight, int instride,
+                     RationalResampleFilter *rfh, RationalResampleFilter *rfv,
+                     int int_extra_bits, ClipProfile *clip, int16_t *y,
+                     int outwidth, int outheight, int outstride);
+
+void resample_horz(const int16_t *x, int inwidth, int inheight, int instride,
+                   RationalResampleFilter *rfh, ClipProfile *clip, int16_t *y,
+                   int outwidth, int outstride);
+
+void resample_vert(const int16_t *x, int inwidth, int inheight, int instride,
+                   RationalResampleFilter *rfv, ClipProfile *clip, int16_t *y,
+                   int outheight, int outstride);
+
+void show_resample_filter(RationalResampleFilter *rf);
+
+int get_resampled_output_length(int inlen, int p, int q, int force_even);
+const char *ext2str(EXT_TYPE ext_type);

diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index dd83e31..df8182a 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c

@@ -3276,8 +3276,19 @@
   int ref_abs_offset[REF_FRAMES] = { 0 };
   int has_bwd_ref = 0;
 
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  const int is_scaled = (start_frame_buf->width != cm->width ||
+                         start_frame_buf->height != cm->height);
+  struct scale_factors sf_;
+  // Inverse scale factor
+  av1_setup_scale_factors_for_frame(&sf_, cm->width, cm->height,
+                                    start_frame_buf->width,
+                                    start_frame_buf->height);
+  const struct scale_factors *sf = &sf_;
+#else
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 
 #if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const int *const ref_order_hints =
@@ -3304,8 +3315,34 @@
   const int mvs_cols =
       ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, TMVP_SHIFT_BITS);
   const int mvs_stride = mvs_cols;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  uint32_t scaled_blk_col_hr_0 = 0;
+  uint32_t scaled_blk_col_hr_step = 0;
+  uint32_t scaled_blk_col_hr = 0;
+  uint32_t scaled_blk_row_hr_0 = 0;
+  uint32_t scaled_blk_row_hr_step = 0;
+  uint32_t scaled_blk_row_hr = 0;
+  if (is_scaled) {
+    scaled_blk_col_hr_0 =
+        (uint32_t)sf->x_scale_fp * 4;  // center of first block
+    scaled_blk_col_hr_step = (uint32_t)sf->x_scale_fp * 8;  // step
+    scaled_blk_row_hr_0 =
+        (uint32_t)sf->y_scale_fp * 4;  // center of first block
+    scaled_blk_row_hr_step = (uint32_t)sf->y_scale_fp * 8;  // step
+    scaled_blk_row_hr = scaled_blk_row_hr_0;
+  }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   const int enable_compound_mv = cm->seq_params.enable_tip;
   for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    int scaled_blk_row = blk_row;
+    if (is_scaled) {
+      scaled_blk_col_hr = scaled_blk_col_hr_0;
+      scaled_blk_row =
+          ROUND_POWER_OF_TWO(scaled_blk_row_hr, REF_SCALE_SHIFT + 3);
+      scaled_blk_row = AOMMIN(scaled_blk_row, mvs_rows - 1);
+    }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
       MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col];
       for (int idx = 0; idx < 1 + enable_compound_mv; ++idx) {
@@ -3316,6 +3353,16 @@
                           ref_abs_offset[ref_frame] <= MAX_FRAME_DISTANCE;
           if (pos_valid) {
             MV ref_mv = mv_ref->mv[idx].as_mv;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+            int scaled_blk_col = blk_col;
+            if (is_scaled) {
+              scaled_blk_col =
+                  ROUND_POWER_OF_TWO(scaled_blk_col_hr, REF_SCALE_SHIFT + 3);
+              scaled_blk_col = AOMMIN(scaled_blk_col, mvs_cols - 1);
+              ref_mv.row = sf->scale_value_y_gen(ref_mv.row, sf);
+              ref_mv.col = sf->scale_value_x_gen(ref_mv.col, sf);
+            }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
             int_mv this_mv;
             int mi_r = blk_row;
             int mi_c = blk_col;
@@ -3341,7 +3388,13 @@
           }
         }
       }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) scaled_blk_col_hr += scaled_blk_col_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) scaled_blk_row_hr += scaled_blk_row_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   }
 
   return 1;
@@ -3375,9 +3428,19 @@
 
   int temporal_scale_factor[REF_FRAMES] = { 0 };
   int ref_abs_offset[REF_FRAMES] = { 0 };
-
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  const int is_scaled = (start_frame_buf->width != cm->width ||
+                         start_frame_buf->height != cm->height);
+  struct scale_factors sf_;
+  // Inverse scale factor
+  av1_setup_scale_factors_for_frame(&sf_, cm->width, cm->height,
+                                    start_frame_buf->width,
+                                    start_frame_buf->height);
+  const struct scale_factors *sf = &sf_;
+#else
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
+#endif
 
 #if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const int *const ref_order_hints =
@@ -3402,8 +3465,34 @@
   const int mvs_cols =
       ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, TMVP_SHIFT_BITS);
   const int mvs_stride = mvs_cols;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  uint32_t scaled_blk_col_hr_0 = 0;
+  uint32_t scaled_blk_col_hr_step = 0;
+  uint32_t scaled_blk_col_hr = 0;
+  uint32_t scaled_blk_row_hr_0 = 0;
+  uint32_t scaled_blk_row_hr_step = 0;
+  uint32_t scaled_blk_row_hr = 0;
+  if (is_scaled) {
+    scaled_blk_col_hr_0 =
+        (uint32_t)sf->x_scale_fp * 4;  // center of first block
+    scaled_blk_col_hr_step = (uint32_t)sf->x_scale_fp * 8;  // step
+    scaled_blk_row_hr_0 =
+        (uint32_t)sf->y_scale_fp * 4;  // center of first block
+    scaled_blk_row_hr_step = (uint32_t)sf->y_scale_fp * 8;  // step
+    scaled_blk_row_hr = scaled_blk_row_hr_0;
+  }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   const int enable_compound_mv = cm->seq_params.enable_tip;
   for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    int scaled_blk_row = blk_row;
+    if (is_scaled) {
+      scaled_blk_col_hr = scaled_blk_col_hr_0;
+      scaled_blk_row =
+          ROUND_POWER_OF_TWO(scaled_blk_row_hr, REF_SCALE_SHIFT + 3);
+      scaled_blk_row = AOMMIN(scaled_blk_row, mvs_rows - 1);
+    }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
       MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col];
       for (int idx = 0; idx < 1 + enable_compound_mv; ++idx) {
@@ -3414,6 +3503,16 @@
                           ref_abs_offset[ref_frame] <= MAX_FRAME_DISTANCE;
           if (pos_valid) {
             MV ref_mv = mv_ref->mv[idx].as_mv;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+            int scaled_blk_col = blk_col;
+            if (is_scaled) {
+              scaled_blk_col =
+                  ROUND_POWER_OF_TWO(scaled_blk_col_hr, REF_SCALE_SHIFT + 3);
+              scaled_blk_col = AOMMIN(scaled_blk_col, mvs_cols - 1);
+              ref_mv.row = sf->scale_value_y_gen(ref_mv.row, sf);
+              ref_mv.col = sf->scale_value_x_gen(ref_mv.col, sf);
+            }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
             int_mv this_mv;
             int mi_r = blk_row;
             int mi_c = blk_col;
@@ -3435,7 +3534,13 @@
           }
         }
       }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) scaled_blk_col_hr += scaled_blk_col_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) scaled_blk_row_hr += scaled_blk_row_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   }
 
   return 1;
@@ -3489,12 +3594,59 @@
   MV_REF *mv_ref_base = start_frame_buf->mvs;
   const int mvs_rows = (cm->mi_params.mi_rows + 1) >> 1;
   const int mvs_cols = (cm->mi_params.mi_cols + 1) >> 1;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  const int is_scaled = (start_frame_buf->width != cm->width ||
+                         start_frame_buf->height != cm->height);
+  struct scale_factors sf_;
+  // Inverse scale factor
+  av1_setup_scale_factors_for_frame(&sf_, cm->width, cm->height,
+                                    start_frame_buf->width,
+                                    start_frame_buf->height);
+  const struct scale_factors *sf = &sf_;
+  const int start_mvs_rows = (start_frame_buf->mi_rows + 1) >> 1;
+  const int start_mvs_cols = (start_frame_buf->mi_cols + 1) >> 1;
+  uint32_t scaled_blk_col_hr_0 = 0;
+  uint32_t scaled_blk_col_hr_step = 0;
+  uint32_t scaled_blk_col_hr = 0;
+  uint32_t scaled_blk_row_hr_0 = 0;
+  uint32_t scaled_blk_row_hr_step = 0;
+  uint32_t scaled_blk_row_hr = 0;
+  if (is_scaled) {
+    scaled_blk_col_hr_0 =
+        (uint32_t)sf->x_scale_fp * 4;  // center of first block
+    scaled_blk_col_hr_step = (uint32_t)sf->x_scale_fp * 8;  // step
+    scaled_blk_row_hr_0 =
+        (uint32_t)sf->y_scale_fp * 4;  // center of first block
+    scaled_blk_row_hr_step = (uint32_t)sf->y_scale_fp * 8;  // step
+    scaled_blk_row_hr = scaled_blk_row_hr_0;
+  }
+#else
+  assert(start_frame_buf->width == cm->width &&
+         start_frame_buf->height == cm->height);
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 
   for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) {
+      scaled_blk_col_hr = scaled_blk_col_hr_0;
+      scaled_blk_row =
+          ROUND_POWER_OF_TWO(scaled_blk_row_hr, REF_SCALE_SHIFT + 3);
+      scaled_blk_row = AOMMIN(scaled_blk_row, mvs_rows - 1);
+    }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
       MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col];
       MV fwd_mv = mv_ref->mv.as_mv;
 
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) {
+        fwd_mv.row = sf->scale_value_y_gen(fwd_mv.row, sf);
+        fwd_mv.col = sf->scale_value_x_gen(fwd_mv.col, sf);
+        scaled_blk_col =
+            ROUND_POWER_OF_TWO(scaled_blk_col_hr, REF_SCALE_SHIFT + 3);
+        scaled_blk_col = AOMMIN(scaled_blk_col, mvs_cols - 1);
+      }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
       if (is_inter_ref_frame(mv_ref->ref_frame)) {
         int_mv this_mv;
         int mi_r, mi_c;
@@ -3526,7 +3678,13 @@
           }
         }
       }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) scaled_blk_col_hr += scaled_blk_col_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) scaled_blk_row_hr += scaled_blk_row_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   }
 
   return 1;
@@ -3574,11 +3732,58 @@
   MV_REF *mv_ref_base = start_frame_buf->mvs;
   const int mvs_rows = (cm->mi_params.mi_rows + 1) >> 1;
   const int mvs_cols = (cm->mi_params.mi_cols + 1) >> 1;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  const int is_scaled = (start_frame_buf->width != cm->width ||
+                         start_frame_buf->height != cm->height);
+  struct scale_factors sf_;
+  // Inverse scale factor
+  av1_setup_scale_factors_for_frame(&sf_, cm->width, cm->height,
+                                    start_frame_buf->width,
+                                    start_frame_buf->height);
+  const struct scale_factors *sf = &sf_;
+  const int start_mvs_rows = (start_frame_buf->mi_rows + 1) >> 1;
+  const int start_mvs_cols = (start_frame_buf->mi_cols + 1) >> 1;
+  uint32_t scaled_blk_col_hr_0 = 0;
+  uint32_t scaled_blk_col_hr_step = 0;
+  uint32_t scaled_blk_col_hr = 0;
+  uint32_t scaled_blk_row_hr_0 = 0;
+  uint32_t scaled_blk_row_hr_step = 0;
+  uint32_t scaled_blk_row_hr = 0;
+  if (is_scaled) {
+    scaled_blk_col_hr_0 =
+        (uint32_t)sf->x_scale_fp * 4;  // center of first block
+    scaled_blk_col_hr_step = (uint32_t)sf->x_scale_fp * 8;  // step
+    scaled_blk_row_hr_0 =
+        (uint32_t)sf->y_scale_fp * 4;  // center of first block
+    scaled_blk_row_hr_step = (uint32_t)sf->y_scale_fp * 8;  // step
+    scaled_blk_row_hr = scaled_blk_row_hr_0;
+  }
+#else
+  assert(start_frame_buf->width == cm->width &&
+         start_frame_buf->height == cm->height);
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 
   for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) {
+      scaled_blk_col_hr = scaled_blk_col_hr_0;
+      scaled_blk_row =
+          ROUND_POWER_OF_TWO(scaled_blk_row_hr, REF_SCALE_SHIFT + 3);
+      scaled_blk_row = AOMMIN(scaled_blk_row, mvs_rows - 1);
+    }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
       MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_cols + blk_col];
       MV fwd_mv = mv_ref->mv.as_mv;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) {
+        fwd_mv.row = sf->scale_value_y_gen(fwd_mv.row, sf);
+        fwd_mv.col = sf->scale_value_x_gen(fwd_mv.col, sf);
+        scaled_blk_col =
+            ROUND_POWER_OF_TWO(scaled_blk_col_hr, REF_SCALE_SHIFT + 3);
+        scaled_blk_col = AOMMIN(scaled_blk_col, mvs_cols - 1);
+      }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 
       if (is_inter_ref_frame(mv_ref->ref_frame)) {
         int_mv this_mv;
@@ -3607,7 +3812,13 @@
           }
         }
       }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) scaled_blk_col_hr += scaled_blk_col_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) scaled_blk_row_hr += scaled_blk_row_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   }
 
   return 1;

diff --git a/av1/common/pef.c b/av1/common/pef.c
index c5587dd..92ef8c5 100644
--- a/av1/common/pef.c
+++ b/av1/common/pef.c

@@ -21,6 +21,9 @@
 #include "av1/common/av1_loopfilter.h"
 #include "av1/common/reconinter.h"
 #include "av1/common/seg_common.h"
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+#include "av1/common/resize.h"
+#endif
 static const int pef_w_mult[4] = { 85, 51, 37, 28 };
 static const int pef_q_mult[4] = { 32, 25, 19, 19 };
 
@@ -477,6 +480,18 @@
                     &pef_input);
     enhance_sub_prediction_blocks(cm, xd, &pef_input);
   }
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  if (av1_superres_scaled(cm)) {
+    // Upscale tip_frame and store in upsampled_tip_frame_buf
+#if CONFIG_2D_SR
+    av1_upscale_normative_2d_and_extend_frame(
+        cm, &cm->tip_ref.tip_frame->buf, &cm->tip_ref.upscaled_tip_frame_buf);
+#else
+    av1_upscale_normative_and_extend_frame(cm, &cm->tip_ref.tip_frame->buf,
+                                           &cm->tip_ref.upscaled_tip_frame_buf);
+#endif  // CONFIG_2D_SR
+  }
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 #endif  // CONFIG_TIP
 

diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index a8c54e2..a41a740 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c

@@ -42,11 +42,19 @@
   // Note: As per the spec, we must test the fixed point scales here, which are
   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
   // have 1 << 10 precision).
-  if (av1_is_scaled(sf)) return 0;
+#if !CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+  if (av1_is_scaled(sf)) {
+//    printf("\nav1_is_scaled(sf):");
+    return 0;
+  } 
+#endif
 
   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
 
-  if (build_for_obmc) return 0;
+  if (build_for_obmc) {
+//    printf("\nbuild_for_obmc:");
+    return 0;
+  }
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
   if (warp_types->local_warp_allowed && !mbmi->wm_params[ref].invalid) {
@@ -66,6 +74,9 @@
     return 1;
   }
 
+//  printf("warp_types->local_warp_allowed? %d",  warp_types->local_warp_allowed);
+//  printf("mbmi->wm_params[%d].invalid? %d",  ref, mbmi->wm_params[ref].invalid);
+//  printf("\nav1_allow_warp:");
   return 0;
 }
 
@@ -121,6 +132,79 @@
   }
 }
 
+#if CONFIG_2D_SR_MC_PHASE_FIX
+void av1_init_phase_offset(InterPredParams *inter_pred_params,
+                           const struct AV1Common *const cm) {
+  for (int plane_type = 0; plane_type < 2; plane_type++) {
+    inter_pred_params->posx_offset[plane_type] = 0;
+    inter_pred_params->posy_offset[plane_type] = 0;
+  }
+#if CONFIG_2D_SR_ZERO_PHASE
+  // phase offset for chroma horizontal filter when using 6x upsampling
+  if (cm->superres_scale_denominator == 24) {
+    inter_pred_params->posx_offset[0] = -512;
+    inter_pred_params->posy_offset[0] = -512;
+    inter_pred_params->posx_offset[1] = -512;
+    inter_pred_params->posy_offset[1] = -512;
+  }
+  // phase offset for chroma horizontal filter when using 4x upsampling
+  if (cm->superres_scale_denominator == 16) {
+    inter_pred_params->posx_offset[0] = -512;
+    inter_pred_params->posy_offset[0] = -512;
+    inter_pred_params->posx_offset[1] = -512;
+    inter_pred_params->posy_offset[1] = -512;
+  }
+  // phase offset for chroma horizontal filter when using 3x upsampling
+  if (cm->superres_scale_denominator == 12) {
+    inter_pred_params->posx_offset[0] = 0;
+    inter_pred_params->posy_offset[0] = 0;
+    inter_pred_params->posx_offset[1] = 0;
+    inter_pred_params->posy_offset[1] = 0;
+  }
+  // phase offset for chroma horizontal filter when using 2x upsampling
+  if (cm->superres_scale_denominator == 8) {
+    inter_pred_params->posx_offset[0] = -512;
+    inter_pred_params->posy_offset[0] = -512;
+    inter_pred_params->posx_offset[1] = -512;
+    inter_pred_params->posy_offset[1] = -512;
+  }
+  // phase offset for chroma horizontal filter when using 1.5x upsampling
+  if (cm->superres_scale_denominator == 6) {
+    inter_pred_params->posx_offset[0] = -256;
+    inter_pred_params->posy_offset[0] = -256;
+    inter_pred_params->posx_offset[1] = -256;
+    inter_pred_params->posy_offset[1] = -256;
+  }
+#else
+  // phase offset for chroma horizontal filter when using 6x upsampling
+  if (cm->superres_scale_denominator == 24) {
+    inter_pred_params->posx_offset[1] = -213;
+  }
+  // phase offset for chroma horizontal filter when using 4x upsampling
+  if (cm->superres_scale_denominator == 16) {
+    inter_pred_params->posx_offset[1] = -192;
+  }
+  // phase offset for chroma horizontal filter when using 3x upsampling
+  if (cm->superres_scale_denominator == 12) {
+    inter_pred_params->posx_offset[1] = -171;
+  }
+  // phase offset for chroma horizontal filter when using 2x upsampling
+  if (cm->superres_scale_denominator == 8) {
+    inter_pred_params->posx_offset[1] = -128;
+  }
+  // phase offset for chroma horizontal filter when using 1.5x upsampling
+  if (cm->superres_scale_denominator == 6) {
+    inter_pred_params->posx_offset[1] = -85;
+  }
+#endif
+
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+  inter_pred_params->mi_rows = cm->mi_params.mi_rows;
+  inter_pred_params->mi_cols = cm->mi_params.mi_cols;
+#endif
+}
+#endif
+
 void av1_init_comp_mode(InterPredParams *inter_pred_params) {
   inter_pred_params->comp_mode = UNIFORM_COMP;
 }
@@ -148,8 +232,13 @@
                      ref,
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
                      0, inter_pred_params->scale_factors,
-                     &inter_pred_params->warp_params))
+                     &inter_pred_params->warp_params)) {
     inter_pred_params->mode = WARP_PRED;
+//    printf("\nav1_allow_warp : allowed\n");
+  } 
+//  else {
+//        printf("\nav1_allow_warp : disallowed\n");
+//  }
 }
 
 void av1_make_inter_predictor(const uint16_t *src, int src_stride,
@@ -170,7 +259,12 @@
         inter_pred_params->pix_col, inter_pred_params->pix_row,
         inter_pred_params->block_width, inter_pred_params->block_height,
         dst_stride, inter_pred_params->subsampling_x,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+        inter_pred_params->subsampling_y, &inter_pred_params->conv_params,
+        subpel_params);
+#else
         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
+#endif        
   } else if (inter_pred_params->mode == TRANSLATION_PRED) {
     highbd_inter_predictor(
         src, src_stride, dst, dst_stride, subpel_params,
@@ -927,6 +1021,9 @@
   av1_init_inter_params(inter_pred_params, bw, bh, pre_y, pre_x,
                         pd->subsampling_x, pd->subsampling_y, xd->bd,
                         mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  av1_init_phase_offset(inter_pred_params, cm);
+#endif
 #if CONFIG_REFINEMV
   inter_pred_params->original_pu_width = pu_width;
   inter_pred_params->original_pu_height = pu_height;
@@ -1658,6 +1755,7 @@
                                 &inter_pred_params->conv_params,
                                 inter_pred_params->bit_depth);
 }
+
 #if !CONFIG_D071_IMP_MSK_BLD
 static
 #endif
@@ -1713,17 +1811,39 @@
     BacpBlockData *b_data_1 =
         &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + 1];
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    const struct scale_factors *sf = inter_pred_params->scale_factors;
+
+    int scale_mult = 1, scale_offset = 0, scale_rshift = 0;
+    if (sf->x_scale_fp == REF_NO_SCALE)               {scale_mult = 1; scale_offset = 0; scale_rshift = 0;} // 1.0
+    //if (sf->x_scale_fp == 1.5*(1 << REF_SCALE_SHIFT)) {scale_mult = 3; scale_offset = 1; scale_rshift = 1;} // 1.5
+    if (sf->x_scale_fp == REF_2x_SCALE)               {scale_mult = 2; scale_offset = 0; scale_rshift = 0;} // 2.0
+    if (sf->x_scale_fp == REF_3x_SCALE)               {scale_mult = 3; scale_offset = 0; scale_rshift = 0;} // 3.0;
+    if (sf->x_scale_fp == REF_4x_SCALE)               {scale_mult = 4; scale_offset = 0; scale_rshift = 0;} // 4.0;
+    if (sf->x_scale_fp == REF_6x_SCALE)               {scale_mult = 6; scale_offset = 0; scale_rshift = 0;} // 6.0;
+#endif
+
     for (int i = 0; i < inter_pred_params->block_height; ++i) {
       for (int j = 0; j < inter_pred_params->block_width; ++j) {
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+        int x = b_data_0->x0 + ((scale_mult * j + scale_offset) >> scale_rshift);
+        int y = b_data_0->y0 + ((scale_mult * i + scale_offset) >> scale_rshift);
+#else
         int x = b_data_0->x0 + j;
         int y = b_data_0->y0 + i;
+#endif
 
         int p0_available =
             (x >= 0 && x < inter_pred_params->ref_frame_buf.width && y >= 0 &&
              y < inter_pred_params->ref_frame_buf.height);
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+        x = b_data_1->x0 + ((scale_mult * j + scale_offset) >> scale_rshift);
+        y = b_data_1->y0 + ((scale_mult * i + scale_offset) >> scale_rshift);
+#else
         x = b_data_1->x0 + j;
         y = b_data_1->y0 + i;
+#endif
         int p1_available =
             (x >= 0 && x < inter_pred_params->ref_frame_buf.width && y >= 0 &&
              y < inter_pred_params->ref_frame_buf.height);
@@ -1739,6 +1859,7 @@
       mask += mask_stride;
     }
   }
+
 #endif  // CONFIG_D071_IMP_MSK_BLD
 
   build_masked_compound_no_round(
@@ -1946,7 +2067,11 @@
                             uint16_t *recon_left, int rec_stride,
                             uint16_t *ref_top, uint16_t *ref_left,
                             int ref_stride, int ref, int plane, int bw,
-                            int bh) {
+#if CONFIG_2D_SR_BAWP_FIX
+	                        int bh, const struct scale_factors *sf) {
+#else
+	                        int bh) {
+#endif
   MB_MODE_INFO *mbmi = xd->mi[0];
   assert(mbmi->bawp_flag == 1);
   // only integer position of reference, may need to consider
@@ -1955,26 +2080,69 @@
   int sum_x = 0, sum_y = 0, sum_xy = 0, sum_xx = 0;
 
   if (xd->up_available) {
-    for (int i = 0; i < bw; ++i) {
-      sum_x += ref_top[i];
-      sum_y += recon_top[i];
-      sum_xy += ref_top[i] * recon_top[i];
-      sum_xx += ref_top[i] * ref_top[i];
-    }
+#if CONFIG_2D_SR_BAWP_FIX
+	  if (sf->x_scale_fp != REF_NO_SCALE) {
+		  for (int i = 0; i < bw; i++) {
+			  int idx = sf->scale_value_x_gen(i, sf);
+			  sum_x += ref_top[idx];
+			  sum_y += recon_top[i];
+			  sum_xy += ref_top[idx] * recon_top[i];
+			  sum_xx += ref_top[idx] * ref_top[idx];
+		  }
+	  }
+	  else {
+		  for (int i = 0; i < bw; ++i) {
+			  sum_x += ref_top[i];
+			  sum_y += recon_top[i];
+			  sum_xy += ref_top[i] * recon_top[i];
+			  sum_xx += ref_top[i] * ref_top[i];
+		  }
+	  }
+#else
+	  for (int i = 0; i < bw; ++i) {
+		  sum_x += ref_top[i];
+		  sum_y += recon_top[i];
+		  sum_xy += ref_top[i] * recon_top[i];
+		  sum_xx += ref_top[i] * ref_top[i];
+	  }
+#endif
     count += bw;
   }
 
   if (xd->left_available) {
-    for (int i = 0; i < bh; ++i) {
-      sum_x += ref_left[0];
-      sum_y += recon_left[0];
-      sum_xy += ref_left[0] * recon_left[0];
-      sum_xx += ref_left[0] * ref_left[0];
+#if CONFIG_2D_SR_BAWP_FIX
+	  if (sf->y_scale_fp != REF_NO_SCALE) {
+		  for (int i = 0; i < bh; i++) {
+			  int ref_left_tmp_idx = sf->scale_value_y_gen(i, sf) * ref_stride;
+			  sum_x += ref_left[ref_left_tmp_idx];
+			  sum_y += recon_left[0];
+			  sum_xy += ref_left[ref_left_tmp_idx] * recon_left[0];
+			  sum_xx += ref_left[ref_left_tmp_idx] * ref_left[ref_left_tmp_idx];
+			  recon_left += rec_stride;
+		  }
+	  }
+	  else {
+		  for (int i = 0; i < bh; ++i) {
+			  sum_x += ref_left[0];
+			  sum_y += recon_left[0];
+			  sum_xy += ref_left[0] * recon_left[0];
+			  sum_xx += ref_left[0] * ref_left[0];
+			  recon_left += rec_stride;
+			  ref_left += ref_stride;
+		  }
+	  }
+#else
+	  for (int i = 0; i < bh; ++i) {
+		  sum_x += ref_left[0];
+		  sum_y += recon_left[0];
+		  sum_xy += ref_left[0] * recon_left[0];
+		  sum_xx += ref_left[0] * ref_left[0];
 
-      recon_left += rec_stride;
-      ref_left += ref_stride;
-    }
-    count += bh;
+		  recon_left += rec_stride;
+		  ref_left += ref_stride;
+	  }
+#endif
+      count += bh;
   }
 
   const int16_t shift = 8;  // maybe a smaller value can be used
@@ -2058,12 +2226,41 @@
     // the picture boundary limitation to be checked.
     struct macroblockd_plane *const pd = &xd->plane[plane];
     const int ref_stride = pd->pre[ref].stride;
-    uint16_t *ref_buf = pd->pre[ref].buf + y_off * ref_stride + x_off;
+
+#if CONFIG_2D_SR_BAWP_FIX
+	uint16_t *ref_buf = pd->pre[ref].buf + y_off * ref_stride + x_off;
+	  const struct scale_factors *sf = inter_pred_params->scale_factors;
+	  if (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE) {
+      //      ref_buf = pd->pre[ref].buf + (y_off * sf->y_scale_fp / REF_NO_SCALE) * ref_stride
+      //          + (x_off * sf->x_scale_fp / REF_NO_SCALE);
+      calc_subpel_params_func(&mbmi->mv[ref].as_mv, inter_pred_params, xd, mi_x,
+                              mi_y, ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+			  0, /* use_optflow_refinement */
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+			  mc_buf, &ref_buf, &subpel_params, &ref_stride);
+	  }
+#else
+	  uint16_t *ref_buf = pd->pre[ref].buf + y_off * ref_stride + x_off;
+#endif
+
     uint16_t *ref_top = ref_buf - BAWP_REF_LINES * ref_stride;
     uint16_t *ref_left = ref_buf - BAWP_REF_LINES;
 
+#if CONFIG_2D_SR_BAWP_FIX
+    if (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE) {
+      ref_top = ref_buf -
+                BAWP_REF_LINES * ref_stride * (sf->y_scale_fp / REF_NO_SCALE);
+      ref_left = ref_buf - BAWP_REF_LINES * (sf->x_scale_fp / REF_NO_SCALE);
+    }
+#endif
+
     derive_bawp_parameters(xd, recon_top, recon_left, recon_stride, ref_top,
-                           ref_left, ref_stride, ref, plane, ref_w, ref_h);
+#if CONFIG_2D_SR_BAWP_FIX
+                           ref_left, ref_stride, ref, plane, ref_w, ref_h, sf);
+#else
+						  ref_left, ref_stride, ref, plane, ref_w, ref_h);
+#endif
   }
 
   int16_t alpha = mbmi->bawp_alpha[plane][ref];
@@ -2173,6 +2370,9 @@
                             pre_x + x, pd->subsampling_x, pd->subsampling_y,
                             xd->bd, mi->use_intrabc[0], sf, &pre_buf,
                             this_mbmi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+      av1_init_phase_offset(&inter_pred_params, cm);
+#endif
       inter_pred_params.conv_params =
           get_conv_params_no_round(ref, plane, NULL, 0, is_compound, xd->bd);
 
@@ -2228,6 +2428,7 @@
 
     if (y0 > ref_area->pad_block.y0 && y0 < ref_area->pad_block.y1)
       ref_row += src_stride;
+
   } while (--b_h);
 }
 // check if padding is required during motion compensation
@@ -2289,12 +2490,21 @@
 // perform padding of the motion compensated block if requires.
 // Padding is performed if the motion compensated block is partially out of the
 // reference area.
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+static void refinemv_extend_mc_border(
+    const struct scale_factors *const sf, struct buf_2d *const pre_buf,
+    MV32 scaled_mv, PadBlock block, int subpel_x_mv, int subpel_y_mv,
+    int do_warp, int is_intrabc, uint16_t *paded_ref_buf,
+    int paded_ref_buf_stride, uint16_t **pre, int *src_stride,
+    const ReferenceArea *ref_area, InterPredParams *const inter_pred_params) {
+#else
 static void refinemv_extend_mc_border(
     const struct scale_factors *const sf, struct buf_2d *const pre_buf,
     MV32 scaled_mv, PadBlock block, int subpel_x_mv, int subpel_y_mv,
     int do_warp, int is_intrabc, uint16_t *paded_ref_buf,
     int paded_ref_buf_stride, uint16_t **pre, int *src_stride,
     const ReferenceArea *ref_area) {
+#endif
   int x_pad = 0, y_pad = 0;
   if (update_extend_mc_border_params(sf, pre_buf, scaled_mv, &block,
                                      subpel_x_mv, subpel_y_mv, do_warp,
@@ -2331,6 +2541,7 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
                                 MV32 *scaled_mv, int *subpel_x_mv,
                                 int *subpel_y_mv) {
+                                	
   const struct scale_factors *sf = inter_pred_params->scale_factors;
   struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
 
@@ -2374,6 +2585,22 @@
     int pos_x = sf->scale_value_x(orig_pos_x, sf);
     pos_x += SCALE_EXTRA_OFF;
     pos_y += SCALE_EXTRA_OFF;
+    
+#if CONFIG_2D_SR_ZERO_PHASE
+    // TODO: Determine plane type from something other than ssx, ssy
+    if (sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += (ssx == 1) ? inter_pred_params->posx_offset[1]
+                          : inter_pred_params->posx_offset[0];
+    }
+    if (sf->y_scale_fp != REF_NO_SCALE) {
+      pos_y += (ssy == 1) ? inter_pred_params->posy_offset[1]
+                          : inter_pred_params->posy_offset[0];
+    }
+#elif CONFIG_2D_SR_MC_PHASE_FIX
+    if (ssx == 1 && sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += inter_pred_params->posx_offset[1];
+    }
+#endif    
 
     const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
     const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
@@ -2382,12 +2609,130 @@
     const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
-
+    
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      const MV mv_q4 = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh, use_optflow_refinement, inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+      int subbpel_pos_x = ((inter_pred_params->pix_col << SUBPEL_BITS) + mv_q4.col) << SCALE_EXTRA_BITS;
+      int subbpel_pos_y = ((inter_pred_params->pix_row << SUBPEL_BITS) + mv_q4.row) << SCALE_EXTRA_BITS;
+      subpel_params->subpel_x = subbpel_pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = subbpel_pos_y & SCALE_SUBPEL_MASK;
+    } else {
+      subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    }
+#else    
     subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+#endif
+    
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      int scale = 0;
+      if (sf->x_scale_fp == REF_2x_SCALE) scale = 2;
+      if (sf->x_scale_fp == REF_3x_SCALE) scale = 3;
+      if (sf->x_scale_fp == REF_4x_SCALE) scale = 4;
+      if (sf->x_scale_fp == REF_6x_SCALE) scale = 6;
+      assert(scale != 0);
+      inter_pred_params->conv_params.stride_scale = scale;
+
+      int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+      int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+    const MV orig_mv_q4 = tip_clamp_mv_to_umv_border_sb(
+          inter_pred_params, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+          use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+          inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+
+      orig_pos_x += orig_mv_q4.col;
+      orig_pos_y += orig_mv_q4.row;
+
+      orig_pos_y =
+          clamp(((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale, top,
+                bottom);
+      orig_pos_x =
+          clamp(((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale, left,
+                right);
+
+
+      // Get reference block top left coordinate.
+      block->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+
+      //// Get reference block bottom right coordinate.
+      //block->x1 = ((orig_pos_x +
+      //              (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+      //     SCALE_SUBPEL_BITS) +
+      //    scale;
+      //block->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) *
+      //                          subpel_params->ys) >>
+      //             SCALE_SUBPEL_BITS) +
+      //            scale;
+
+#if CONFIG_D071_IMP_MSK_BLD
+      //block->x1 = orig_pos_x + (inter_pred_params->block_width) * scale;
+      //block->y1 = orig_pos_y + (inter_pred_params->block_height) * scale;
+      block->x1 =
+          ((orig_pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          scale;
+      block->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) *
+                                subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  scale;
+#else
+      //block->x1 = orig_pos_x + bw * scale;
+      //block->y1 = orig_pos_y + bh * scale;
+      block->x1 =
+          ((orig_pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1;
+      block->y1 =
+          ((orig_pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+    } else {
+      inter_pred_params->conv_params.stride_scale = 1;
+
+      // Get reference block top left coordinate.
+      block->x0 = pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = pos_y >> SCALE_SUBPEL_BITS;
+
+#if CONFIG_D071_IMP_MSK_BLD
+      block->x1 =
+          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          1;
+      block->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
+                                subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  1;
+#else
+      // Get reference block bottom right coordinate.
+      block->x1 =
+          ((pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1;
+      block->y1 =
+          ((pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+    }
+    MV temp_mv;
+    temp_mv = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                            use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                            inter_pred_params->subsampling_x,
+                                            inter_pred_params->subsampling_y);
+
+      *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf);
+      scaled_mv->row += SCALE_EXTRA_OFF;
+      scaled_mv->col += SCALE_EXTRA_OFF;
+
+      *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
+      *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#else
     // Get reference block top left coordinate.
     block->x0 = pos_x >> SCALE_SUBPEL_BITS;
     block->y0 = pos_y >> SCALE_SUBPEL_BITS;
@@ -2422,6 +2767,7 @@
 
     *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
     *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#endif    
   } else {
     // Get block position in current frame.
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
@@ -2497,12 +2843,21 @@
 
   const int paded_ref_buf_stride =
       inter_pred_params->ref_area->paded_ref_buf_stride;
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+  refinemv_extend_mc_border(
+      inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf,
+      scaled_mv, block, subpel_x_mv, subpel_y_mv,
+      inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc,
+      &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre,
+      src_stride, inter_pred_params->ref_area, inter_pred_params);
+#else      
   refinemv_extend_mc_border(
       inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf,
       scaled_mv, block, subpel_x_mv, subpel_y_mv,
       inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc,
       &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre,
       src_stride, inter_pred_params->ref_area);
+#endif      
 }
 #endif
 
@@ -2516,6 +2871,7 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
                             MV32 *scaled_mv, int *subpel_x_mv,
                             int *subpel_y_mv) {
+                          	
   const struct scale_factors *sf = inter_pred_params->scale_factors;
   struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
 
@@ -2560,6 +2916,22 @@
     int pos_x = sf->scale_value_x(orig_pos_x, sf);
     pos_x += SCALE_EXTRA_OFF;
     pos_y += SCALE_EXTRA_OFF;
+    
+#if CONFIG_2D_SR_ZERO_PHASE
+    // TODO: Determine plane type from something other than ssx, ssy
+    if (sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += (ssx == 1) ? inter_pred_params->posx_offset[1]
+                          : inter_pred_params->posx_offset[0];
+    }
+    if (sf->y_scale_fp != REF_NO_SCALE) {
+      pos_y += (ssy == 1) ? inter_pred_params->posy_offset[1]
+                          : inter_pred_params->posy_offset[0];
+    }
+#elif CONFIG_2D_SR_MC_PHASE_FIX
+    if (ssx == 1 && sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += inter_pred_params->posx_offset[1];
+    }
+#endif    
 
     const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
     const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
@@ -2569,11 +2941,96 @@
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, src_mv, bw, bh, use_optflow_refinement, inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+      int subbpel_pos_x = ((inter_pred_params->pix_col << SUBPEL_BITS) + mv_q4.col) << SCALE_EXTRA_BITS;
+      int subbpel_pos_y = ((inter_pred_params->pix_row << SUBPEL_BITS) + mv_q4.row) << SCALE_EXTRA_BITS;
+      subpel_params->subpel_x = subbpel_pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = subbpel_pos_y & SCALE_SUBPEL_MASK;
+    } else {
+      subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    }
+#else
     subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+#endif
+    
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      int scale = 0;
+      if (sf->x_scale_fp == REF_2x_SCALE) scale = 2;
+      if (sf->x_scale_fp == REF_3x_SCALE) scale = 3;
+      if (sf->x_scale_fp == REF_4x_SCALE) scale = 4;
+      if (sf->x_scale_fp == REF_6x_SCALE) scale = 6;
+      assert(scale != 0);
+
+      inter_pred_params->conv_params.stride_scale = scale;
+
+      int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+      int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+      const MV orig_mv_q4 = clamp_mv_to_umv_border_sb(
+          xd, src_mv, bw, bh, use_optflow_refinement,
+          inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+
+      orig_pos_x += orig_mv_q4.col;
+      orig_pos_y += orig_mv_q4.row;
+
+      orig_pos_y = clamp(((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         top, bottom);
+      orig_pos_x = clamp(((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         left, right);
+
+      // Get reference block top left coordinate.
+      block->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      block->x1 = ((orig_pos_x +
+                    (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+                   SCALE_SUBPEL_BITS) +
+                  scale;
+      block->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) *
+                                     subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  scale;
+    } else {
+      inter_pred_params->conv_params.stride_scale = 1;
+
+      // Get reference block top left coordinate.
+      block->x0 = pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = pos_y >> SCALE_SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      block->x1 =
+          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          1;
+      block->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
+                                subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  1;
+    }
+
+    MV temp_mv;
+    temp_mv = clamp_mv_to_umv_border_sb(xd, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                        inter_pred_params->subsampling_x,
+                                        inter_pred_params->subsampling_y);
+    *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf);
+    scaled_mv->row += SCALE_EXTRA_OFF;
+    scaled_mv->col += SCALE_EXTRA_OFF;
+
+    *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
+    *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#else
     // Get reference block top left coordinate.
     block->x0 = pos_x >> SCALE_SUBPEL_BITS;
     block->y0 = pos_y >> SCALE_SUBPEL_BITS;
@@ -2601,6 +3058,7 @@
 
     *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
     *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#endif    
   } else {
     // Get block position in current frame.
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
@@ -2671,12 +3129,21 @@
   // printf(" Use ref padding \n");
   const int paded_ref_buf_stride =
       inter_pred_params->ref_area->paded_ref_buf_stride;
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT      
+  refinemv_extend_mc_border(
+      inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf,
+      scaled_mv, block, subpel_x_mv, subpel_y_mv,
+      inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc,
+      &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre,
+      src_stride, inter_pred_params->ref_area, inter_pred_params);
+#else
   refinemv_extend_mc_border(
       inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf,
       scaled_mv, block, subpel_x_mv, subpel_y_mv,
       inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc,
       &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre,
       src_stride, inter_pred_params->ref_area);
+#endif      
 }
 
 static void get_ref_area_info(const MV *const src_mv,
@@ -2767,6 +3234,9 @@
                           pd->subsampling_x, pd->subsampling_y, xd->bd,
                           mi->use_intrabc[0], sf, pre_buf,
                           is_tip ? MULTITAP_SHARP : mi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
 
     inter_pred_params.original_pu_width = bw;
     inter_pred_params.original_pu_height = bh;
@@ -2858,6 +3328,9 @@
     av1_init_inter_params(&inter_pred_params[ref], bw, bh, pre_y, pre_x,
                           pd->subsampling_x, pd->subsampling_y, xd->bd,
                           mi->use_intrabc[0], sf, pre_buf, BILINEAR);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params[ref], cm);
+#endif
 
 #if CONFIG_REFINEMV
     inter_pred_params[ref].original_pu_width = pu_width;
@@ -3178,6 +3651,9 @@
     av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
                           pd->subsampling_x, pd->subsampling_y, xd->bd,
                           mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
 
 #if CONFIG_REFINEMV
     inter_pred_params.use_ref_padding = 1;
@@ -3446,10 +3922,17 @@
 
   if (use_optflow_refinement && plane == 0) {
     // Allocate gradient and dst buffers
-    gx0 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx0));
-    gx1 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx1));
-    gy0 = gx0 + (MAX_SB_SIZE * MAX_SB_SIZE);
-    gy1 = gx1 + (MAX_SB_SIZE * MAX_SB_SIZE);
+#if CONFIG_2D_SR_SCALE_EXT
+	  gx0 = aom_memalign(32, 36 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx0));
+	  gx1 = aom_memalign(32, 36 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx1));
+	  gy0 = gx0 + (MAX_SB_SIZE * MAX_SB_SIZE);
+	  gy1 = gx1 + (MAX_SB_SIZE * MAX_SB_SIZE);
+#else  // CONFIG_2D_SR_SCALE_EXT
+	  gx0 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx0));
+	  gx1 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx1));
+	  gy0 = gx0 + (MAX_SB_SIZE * MAX_SB_SIZE);
+	  gy1 = gx1 + (MAX_SB_SIZE * MAX_SB_SIZE);
+#endif  // CONFIG_2D_SR_SCALE_EXT 
 
     // Initialize refined mv
 #if CONFIG_REFINEMV
@@ -3465,8 +3948,13 @@
     }
     // Refine MV using optical flow. The final output MV will be in 1/16
     // precision.
-    dst0 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
-    dst1 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+#if CONFIG_2D_SR_SCALE_EXT
+	dst0 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+	dst1 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+#else  // CONFIG_2D_SR_SCALE_EXT
+	dst0 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+	dst1 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+#endif  // CONFIG_2D_SR_SCALE_EXT
     av1_get_optflow_based_mv_highbd(cm, xd, plane, mi, mv_refined, bw, bh, mi_x,
                                     mi_y, mc_buf, calc_subpel_params_func, gx0,
                                     gy0, gx1, gy1, vx0, vy0, vx1, vy1, dst0,
@@ -3506,6 +3994,9 @@
     av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
                           pd->subsampling_x, pd->subsampling_y, xd->bd,
                           mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
     if (is_compound) av1_init_comp_mode(&inter_pred_params);
 #if CONFIG_D071_IMP_MSK_BLD
     inter_pred_params.border_data.enable_bacp = use_bacp;

diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 5a5a146..7f08a56 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h

@@ -162,6 +162,7 @@
 
 extern const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL];
 
+#if !CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
 typedef struct SubpelParams {
   int xs;
   int ys;
@@ -175,6 +176,7 @@
 #endif     // CONFIG_D071_IMP_MSK_BLD
 
 } SubpelParams;
+#endif
 
 struct build_prediction_ctxt {
   const AV1_COMMON *cm;
@@ -259,6 +261,15 @@
 #if CONFIG_D071_IMP_MSK_BLD
   INTERINTER_COMPOUND_BORDER_DATA border_data;
 #endif  // CONFIG_D071_IMP_MSK_BLD
+
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  int posx_offset[2];
+  int posy_offset[2];
+#endif
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+  int mi_rows;
+  int mi_cols;
+#endif
 } InterPredParams;
 
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -330,6 +341,10 @@
                            int is_intrabc, const struct scale_factors *sf,
                            const struct buf_2d *ref_buf,
                            InterpFilter interp_filter);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+void av1_init_phase_offset(InterPredParams *inter_pred_params,
+                           const AV1_COMMON *const cm);
+#endif
 
 #if CONFIG_WARP_REF_LIST
 // Check if the signaling of the warp delta parameters are allowed

diff --git a/av1/common/resize.c b/av1/common/resize.c
index 3b61abc..dec0dd4 100644
--- a/av1/common/resize.c
+++ b/av1/common/resize.c

@@ -24,6 +24,9 @@
 #include "aom_scale/aom_scale.h"
 #include "av1/common/common.h"
 #include "av1/common/resize.h"
+#if CONFIG_2D_SR
+#include "av1/common/lanczos_resample.h"
+#endif  // CONFIG_2D_SR
 
 #include "config/aom_dsp_rtcd.h"
 #include "config/aom_scale_rtcd.h"
@@ -985,6 +988,112 @@
   aom_free(arrbuf2);
 }
 
+#if CONFIG_2D_SR
+#define LANCZOS_A_NORMATIVE_HOR_Y 5  // Normative hor Lanczos a Luma
+#define LANCZOS_A_NORMATIVE_HOR_C 5  // Normative hor Lanczos a Chroma
+#define LANCZOS_A_NORMATIVE_VER_Y 5  // Normative ver Lanczos a Luma
+#define LANCZOS_A_NORMATIVE_VER_C 5  // Normative ver Lanczos a Chroma
+
+#define LANCZOS_A_NONNORMATIVE_HOR_Y 5  // Non-normative hor Lanczos a Luma
+#define LANCZOS_A_NONNORMATIVE_HOR_C 5  // Non-normative hor Lanczos a Chroma
+#define LANCZOS_A_NONNORMATIVE_VER_Y 5  // Non-normative ver Lanczos a Luma
+#define LANCZOS_A_NONNORMATIVE_VER_C \
+  5  // Non-normative ver Lanczos a Chroma
+     // Chroma
+void av1_resample_plane_2d_lanczos(const uint16_t *const input, int height,
+                                   int width, int in_stride, uint16_t *output,
+                                   int height2, int width2, int out_stride,
+                                   int subx, int suby, int bd, int denom,
+                                   int num, int lanczos_a_hor,
+                                   int lanczos_a_ver) {
+  int coeff_prec_bits = 14;
+  int extra_prec_bits = 2;
+  WIN_TYPE win = WIN_LANCZOS;
+  EXT_TYPE ext = EXT_REPEAT;
+  ClipProfile clip = { bd, 0 };
+  int horz_a = lanczos_a_hor;
+  int vert_a = lanczos_a_ver;
+#if CONFIG_2D_SR_ZERO_PHASE
+  double horz_x0 = (double)('z');
+  double vert_x0 = (double)('z');
+#else
+  double horz_x0 = subx ? (double)('d') : (double)('c');
+  double vert_x0 = suby ? (double)('d') : (double)('c');
+#endif
+
+  RationalResampleFilter horz_rf;
+  RationalResampleFilter vert_rf;
+
+  if (!get_resample_filter(num, denom, horz_a, horz_x0, ext, win, subx,
+                           coeff_prec_bits, &horz_rf)) {
+    fprintf(stderr, "Cannot generate filter, exiting!\n");
+    exit(1);
+  }
+#if CONFIG_2D_SR_RESAMPLER_FIX && !CONFIG_2D_SR_ZERO_PHASE
+  if (!get_resample_filter(num, denom, vert_a, vert_x0, ext, win, 0,
+#else
+  if (!get_resample_filter(num, denom, vert_a, vert_x0, ext, win, suby,
+#endif
+                           coeff_prec_bits, &vert_rf)) {
+    fprintf(stderr, "Cannot generate filter, exiting!\n");
+    exit(1);
+  }
+
+  av1_resample_2d((const int16_t *)input, width, height, in_stride, &horz_rf,
+                  &vert_rf, extra_prec_bits, &clip, (int16_t *)output, width2,
+                  height2, out_stride);
+}
+
+void av1_resize_lanczos_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                         YV12_BUFFER_CONFIG *dst, int bd,
+                                         const int num_planes, const int subx,
+                                         const int suby, const int denom,
+                                         const int num) {
+  for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
+    const int is_uv = i > 0;
+    const int lanczos_a_hor =
+        is_uv ? LANCZOS_A_NONNORMATIVE_HOR_C : LANCZOS_A_NONNORMATIVE_HOR_Y;
+    const int lanczos_a_ver =
+        is_uv ? LANCZOS_A_NONNORMATIVE_VER_C : LANCZOS_A_NONNORMATIVE_VER_Y;
+    av1_resample_plane_2d_lanczos(
+        src->buffers[i], src->crop_heights[is_uv], src->crop_widths[is_uv],
+        src->strides[is_uv], dst->buffers[i], dst->crop_heights[is_uv],
+        dst->crop_widths[is_uv], dst->strides[is_uv], is_uv ? subx : 0,
+        is_uv ? suby : 0, bd, denom, num, lanczos_a_hor, lanczos_a_ver);
+  }
+  aom_extend_frame_borders(dst, num_planes);
+
+}
+
+int64_t av1_downup_lanczos_sse(const YV12_BUFFER_CONFIG *src, int bd, int denom,
+                               int num) {
+  const int width = src->crop_widths[0];
+  const int height = src->crop_heights[0];
+
+  int width2 = width, height2 = height;
+  av1_calculate_scaled_superres_size(&width2, &height2, denom, num);
+  uint16_t *down = (uint16_t *)aom_malloc(sizeof(*down) * width2 * height2);
+  int down_stride = width2;
+
+  YV12_BUFFER_CONFIG outbuf;
+  memset(&outbuf, 0, sizeof(outbuf));
+  aom_alloc_frame_buffer(&outbuf, width, height, src->subsampling_x,
+                         src->subsampling_y, 0, 32);
+  av1_resample_plane_2d_lanczos(
+      src->y_buffer, height, width, src->y_stride, down, height2, width2,
+      down_stride, src->subsampling_x, src->subsampling_y, bd, denom, num,
+      LANCZOS_A_NONNORMATIVE_HOR_Y, LANCZOS_A_NONNORMATIVE_VER_Y);
+  av1_resample_plane_2d_lanczos(
+      down, height2, width2, down_stride, outbuf.y_buffer, height, width,
+      outbuf.y_stride, src->subsampling_x, src->subsampling_y, bd, num, denom,
+      LANCZOS_A_NORMATIVE_HOR_Y, LANCZOS_A_NORMATIVE_VER_Y);
+  int64_t sse = aom_highbd_get_y_sse(src, &outbuf);
+  aom_free(down);
+  aom_free_frame_buffer(&outbuf);
+  return sse;
+}
+#endif  // CONFIG_2D_SR
+
 static void highbd_upscale_normative_rect(const uint16_t *const input,
                                           int height, int width, int in_stride,
                                           uint16_t *output, int height2,
@@ -1184,6 +1293,65 @@
   aom_extend_frame_borders(dst, num_planes);
 }
 
+#if CONFIG_2D_SR
+void av1_upscale_normative_2d(const AV1_COMMON *cm, const uint16_t *src, int height,
+                              int width, int src_stride, uint16_t *dst,
+                              int height2, int width2, int dst_stride,
+                              int plane ) {
+
+  int denom = -1;
+  const int denom_h = cm->superres_scale_denominator;
+  const int denom_v = cm->superres_scale_denominator;
+
+  const int is_uv = (plane > 0);
+  const int ss_x = is_uv && cm->seq_params.subsampling_x;
+  const int ss_y = is_uv && cm->seq_params.subsampling_y;
+
+  int coeff_prec_bits = 14;
+  int extra_prec_bits = 2;
+  WIN_TYPE win = WIN_LANCZOS;
+  EXT_TYPE ext = EXT_REPEAT;
+  ClipProfile clip = { (int) cm->seq_params.bit_depth, 0 };
+  int horz_a = 5;
+  int vert_a = 5;
+#if CONFIG_2D_SR_ZERO_PHASE
+  double horz_x0 = (double)('z');
+  double vert_x0 = (double)('z');
+#else
+  double horz_x0 = ss_x ? (double)('d') : (double)('c');
+  double vert_x0 = ss_y ? (double)('d') : (double)('c');
+#endif
+
+  RationalResampleFilter horz_rf;
+  RationalResampleFilter vert_rf;
+
+  denom = denom_h;
+
+  if (!get_resample_filter(denom, SCALE_NUMERATOR, horz_a, horz_x0,
+                           ext, win, ss_x,
+                           coeff_prec_bits, &horz_rf)) {
+    fprintf(stderr, "Cannot generate filter, exiting!\n");
+    exit(1);
+  }
+
+  denom = denom_v;
+
+  if (!get_resample_filter(denom, SCALE_NUMERATOR, vert_a, vert_x0,
+#if CONFIG_2D_SR_RESAMPLER_FIX && !CONFIG_2D_SR_ZERO_PHASE
+                           ext, win, 0,
+#else
+                           ext, win, ss_y,
+#endif
+                           coeff_prec_bits, &vert_rf)) {
+    fprintf(stderr, "Cannot generate filter, exiting!\n");
+    exit(1);
+  }
+
+  av1_resample_2d((const int16_t *)src, width, height, src_stride, &horz_rf, &vert_rf,
+                  extra_prec_bits, &clip, (const int16_t *)dst, width2, height2,
+                  dst_stride);
+}
+#endif  // CONFIG_2D_SR
 void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint16_t *src,
                                 int src_stride, uint16_t *dst, int dst_stride,
                                 int plane, int rows) {
@@ -1239,6 +1407,22 @@
   }
 }
 
+#if CONFIG_2D_SR
+void av1_upscale_normative_2d_and_extend_frame(const AV1_COMMON *cm,
+                                             const YV12_BUFFER_CONFIG *src,
+                                             YV12_BUFFER_CONFIG *dst ) {
+	const int num_planes = av1_num_planes(cm);
+	for (int i = 0; i < num_planes; ++i) {
+		const int is_uv = (i > 0);
+        av1_upscale_normative_2d(
+                        cm, src->buffers[i], src->crop_heights[is_uv], src->crop_widths[is_uv],
+		src->strides[is_uv], dst->buffers[i], dst->crop_heights[is_uv],
+			dst->crop_widths[is_uv], dst->strides[is_uv], i);
+	}
+
+	aom_extend_frame_borders(dst, num_planes);
+}
+#else   // CONFIG_2D_SR
 void av1_upscale_normative_and_extend_frame(const AV1_COMMON *cm,
                                             const YV12_BUFFER_CONFIG *src,
                                             YV12_BUFFER_CONFIG *dst) {
@@ -1252,6 +1436,7 @@
 
   aom_extend_frame_borders(dst, num_planes);
 }
+#endif  // CONFIG_2D_SR
 
 YV12_BUFFER_CONFIG *av1_scale_if_required(
     AV1_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled,
@@ -1268,18 +1453,61 @@
 
   if (scaling_required) {
     const int num_planes = av1_num_planes(cm);
+#if CONFIG_2D_SR
+    if (cm->superres_scale_denominator > cm->superres_scale_numerator) {
+      av1_resize_lanczos_and_extend_frame(
+          unscaled, scaled, (int)cm->seq_params.bit_depth, num_planes,
+          unscaled->subsampling_x, unscaled->subsampling_y,
+          cm->superres_scale_denominator, cm->superres_scale_numerator);
+    } else {
+#endif  // CONFIG_2D_SR
     if (use_optimized_scaler && cm->seq_params.bit_depth == AOM_BITS_8) {
       av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes);
     } else {
       av1_resize_and_extend_frame_nonnormative(
           unscaled, scaled, (int)cm->seq_params.bit_depth, num_planes);
+	}
+#if CONFIG_2D_SR
     }
+#endif  // CONFIG_2D_SR
     return scaled;
   } else {
     return unscaled;
   }
 }
 
+#if CONFIG_2D_SR
+// Calculates the scaled dimension given the original dimension and the scale
+// denominator.
+static void calculate_scaled_size_helper(int *dim, int denom, int num) {
+  if (denom != num) {
+    // We need to ensure the constraint in "Appendix A" of the spec:
+    // * FrameWidth is greater than or equal to 16
+    // * FrameHeight is greater than or equal to 16
+    // For this, we clamp the downscaled dimension to at least 16. One
+    // exception: if original dimension itself was < 16, then we keep the
+    // downscaled dimension to be same as the original, to ensure that resizing
+    // is valid.
+    const int min_dim = AOMMIN(16, *dim);
+    // Use this version if we need *dim to be even
+    // *width = (*width * SCALE_NUMERATOR + denom) / (2 * denom);
+    // *width <<= 1;
+    *dim = (*dim * num + denom / 2) / (denom);
+    *dim = AOMMAX(*dim, min_dim);
+  }
+}
+
+void av1_calculate_scaled_size(int *width, int *height, int resize_denom) {
+  calculate_scaled_size_helper(width, resize_denom, SCALE_NUMERATOR);
+  calculate_scaled_size_helper(height, resize_denom, SCALE_NUMERATOR);
+}
+
+void av1_calculate_scaled_superres_size(int *width, int *height,
+                                        int superres_denom, int superres_num) {
+  calculate_scaled_size_helper(width, superres_denom, superres_num);
+  calculate_scaled_size_helper(height, superres_denom, superres_num);
+}
+#else   // CONFIG_2D_SR
 // Calculates the scaled dimension given the original dimension and the scale
 // denominator.
 static void calculate_scaled_size_helper(int *dim, int denom) {
@@ -1310,6 +1538,7 @@
   (void)height;
   calculate_scaled_size_helper(width, superres_denom);
 }
+#endif  // CONFIG_2D_SR
 
 void av1_calculate_unscaled_superres_size(int *width, int *height, int denom) {
   if (denom != SCALE_NUMERATOR) {
@@ -1346,18 +1575,30 @@
 
   YV12_BUFFER_CONFIG *const frame_to_show = &cm->cur_frame->buf;
 
+#if CONFIG_2D_SR
+  if (aom_alloc_frame_buffer(
+          &copy_buffer, cm->width, cm->height, seq_params->subsampling_x,
+          seq_params->subsampling_y, AOM_BORDER_IN_PIXELS, byte_alignment))
+    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+                       "Failed to alloc copy buffer for 2D-superres upscaling");
+#else   // CONFIG_2D_SR
   const int aligned_width = ALIGN_POWER_OF_TWO(cm->width, 3);
   if (aom_alloc_frame_buffer(
           &copy_buffer, aligned_width, cm->height, seq_params->subsampling_x,
           seq_params->subsampling_y, AOM_BORDER_IN_PIXELS, byte_alignment))
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate copy buffer for superres upscaling");
+#endif  // CONFIG_2D_SR
 
   // Copy function assumes the frames are the same size.
   // Note that it does not copy YV12_BUFFER_CONFIG config data.
   aom_yv12_copy_frame(frame_to_show, &copy_buffer, num_planes);
 
+#if CONFIG_2D_SR
+  assert(copy_buffer.y_crop_width == cm->width);
+#else   // CONFIG_2D_SR
   assert(copy_buffer.y_crop_width == aligned_width);
+#endif  // CONFIG_2D_SR
   assert(copy_buffer.y_crop_height == cm->height);
 
   // Realloc the current frame buffer at a higher resolution in place.
@@ -1414,7 +1655,11 @@
 
   // Scale up and back into frame_to_show.
   assert(frame_to_show->y_crop_width != cm->width);
+#if CONFIG_2D_SR
+  av1_upscale_normative_2d_and_extend_frame(cm, &copy_buffer, frame_to_show);
+#else   // CONFIG_2D_SR
   av1_upscale_normative_and_extend_frame(cm, &copy_buffer, frame_to_show);
+#endif  // CONFIG_2D_SR
 
   // Free the copy buffer
   aom_free_frame_buffer(&copy_buffer);

diff --git a/av1/common/resize.h b/av1/common/resize.h
index 71f4680..f00cc58 100644
--- a/av1/common/resize.h
+++ b/av1/common/resize.h

@@ -21,6 +21,37 @@
 extern "C" {
 #endif
 
+
+#if CONFIG_2D_SR
+typedef struct {
+  uint8_t scale_num;
+  uint8_t scale_denom;
+} ScaleFactor;
+#if CONFIG_2D_SR_SCALE_EXT 
+static const ScaleFactor superres_scales[SUPERRES_SCALES] = {
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING  
+	{ 4, 6 },{ 4, 8 },{ 4, 12 },{ 4, 16 },{ 4, 24 },{ 4, 5 },{ 4, 7 },{ 4, 10 } // Currently 1.5X, 2X, 3X, 4X, 6X used
+#else
+	{ 4, 5 },{ 4, 6 },{ 4, 7 },{ 4, 8 },{ 4, 10 },{ 4, 12 },{ 4, 16 },{ 4, 24 }
+#endif
+};
+#else
+static const ScaleFactor superres_scales[SUPERRES_SCALES] = {
+	{ 8, 10 },{ 8, 12 },{ 8, 14 },{ 8, 16 }
+};
+#endif
+
+void av1_resize_lanczos_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                         YV12_BUFFER_CONFIG *dst, int bd,
+                                         const int num_planes, const int subx,
+                                         const int suby, const int denom,
+                                         const int num);
+void av1_upscale_2d_normative_and_extend_frame(const AV1_COMMON *cm,
+                                               const YV12_BUFFER_CONFIG *src,
+                                               YV12_BUFFER_CONFIG *dst);
+int64_t av1_downup_lanczos_sse(const YV12_BUFFER_CONFIG *src, int bd, int denom,
+                               int num);
+#endif  // CONFIG_2D_SR
 void av1_resize_plane(const uint8_t *const input, int height, int width,
                       int in_stride, uint8_t *output, int height2, int width2,
                       int out_stride);
@@ -88,10 +119,17 @@
 // resize scale denominator.
 void av1_calculate_scaled_size(int *width, int *height, int resize_denom);
 
+#if CONFIG_2D_SR
+// Similar to above, but calculates scaled dimensions after superres from the
+// given original dimensions and superres scale denominator.
+void av1_calculate_scaled_superres_size(int *width, int *height,
+                                        int superres_denom, int superres_num);
+#else   // CONFIG_2D_SR
 // Similar to above, but calculates scaled dimensions after superres from the
 // given original dimensions and superres scale denominator.
 void av1_calculate_scaled_superres_size(int *width, int *height,
                                         int superres_denom);
+#endif  // CONFIG_2D_SR
 
 // Inverse of av1_calculate_scaled_superres_size() above: calculates the
 // original dimensions from the given scaled dimensions and the scale

diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 2a91711..693ee60 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c

@@ -549,9 +549,36 @@
 // maximum RU size is equal to RESTORATION_UNITSIZE_MAX
 // The setting here is also for encoder search.
 void set_restoration_unit_size(int width, int height, int sx, int sy,
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+                               RestorationInfo *rst, uint8_t superres_scale_denominator) {
+#else
                                RestorationInfo *rst) {
+#endif
   int s = AOMMIN(sx, sy);
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+  rst[0].max_restoration_unit_size = ((RESTORATION_UNITSIZE_MAX / 6) * ((int)(superres_scale_denominator / SCALE_NUMERATOR))) >> 0;
+  rst[0].min_restoration_unit_size = ((RESTORATION_UNITSIZE_MAX / 6) * ((int)(superres_scale_denominator / SCALE_NUMERATOR))) >> 2;
 
+  if(superres_scale_denominator != SCALE_NUMERATOR) {
+    rst[1].max_restoration_unit_size = rst[0].max_restoration_unit_size;
+    rst[1].min_restoration_unit_size = rst[0].min_restoration_unit_size; 
+  } else {
+    // For large resolution, the minimum RU size is set to
+    // RESTORATION_UNITSIZE_MAX >> 1 to reduce the encode complexity.
+    if (width * height > 1920 * 1080 * 2)
+      rst[0].min_restoration_unit_size = (RESTORATION_UNITSIZE_MAX / 6) >> 1;
+
+    rst[1].max_restoration_unit_size = rst[0].max_restoration_unit_size >> s;
+    rst[1].min_restoration_unit_size = rst[0].min_restoration_unit_size >> s;
+  }
+  
+  rst[2].max_restoration_unit_size = rst[1].max_restoration_unit_size;
+  rst[2].min_restoration_unit_size = rst[1].min_restoration_unit_size;
+
+  rst[0].restoration_unit_size = rst[0].min_restoration_unit_size;
+  rst[1].restoration_unit_size = rst[1].min_restoration_unit_size;
+  rst[2].restoration_unit_size = rst[2].min_restoration_unit_size;
+#else
   rst[0].max_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 0;
   rst[0].min_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 2;
 
@@ -569,11 +596,17 @@
   rst[0].restoration_unit_size = rst[0].min_restoration_unit_size;
   rst[1].restoration_unit_size = rst[1].min_restoration_unit_size;
   rst[2].restoration_unit_size = rst[2].min_restoration_unit_size;
+#endif
 }
 #endif  // CONFIG_FLEXIBLE_RU_SIZE
 
 static void extend_frame_highbd(uint16_t *data, int width, int height,
                                 int stride, int border_horz, int border_vert) {
+
+#if 0
+  printf("\textend_frame_highbd: %d\n", height);
+#endif
+
   uint16_t *data_p;
   int i, j;
   for (i = 0; i < height; ++i) {
@@ -1415,11 +1448,18 @@
 // Initializes the accumulators.
 static void initialize_feature_accumulators(int feature_lead, int feature_lag,
                                             int tskip_lead, int tskip_lag,
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+                                            PcwienerBuffers *buffers, bool skip_acc_txskip_flag) {
+#else
                                             PcwienerBuffers *buffers) {
+#endif                                            
   av1_zero(buffers->directional_feature_accumulator);
   av1_zero(buffers->tskip_feature_accumulator);
   // Initialize accumulators on the leftmost portion of the line.
   init_directional_feature_accumulator(0, feature_lead, feature_lag, buffers);
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  if(!skip_acc_txskip_flag)
+#endif  
   init_tskip_feature_accumulator(0, tskip_lead, tskip_lag, buffers);
 }
 
@@ -1452,6 +1492,9 @@
           ROUND_POWER_OF_TWO_SIGNED(feature_vector[f], bit_depth_shift);
   }
   const int tskip_index = NUM_PC_WIENER_FEATURES;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  assert(buffers->tskip_feature_accumulator[accum_index] >= 0);
+#endif  
   feature_vector[tskip_index] =
       buffers->tskip_feature_accumulator[accum_index] *
       buffers->feature_normalizers[tskip_index];
@@ -1515,7 +1558,11 @@
   const int tskip_index = NUM_PC_WIENER_FEATURES;
   const int tskip = feature_vector[tskip_index];
 
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  assert(tskip >= 0 && tskip < 256);
+#else
   assert(tskip < 256);
+#endif
   for (int i = 0; i < NUM_PC_WIENER_FEATURES; ++i)
     assert(feature_vector[i] >= 0);
 
@@ -1548,7 +1595,11 @@
     uint8_t *wiener_class_id, int wiener_class_id_stride, bool is_uv,
     int bit_depth, bool classify_only,
     const int16_t (*pcwiener_filters_luma)[NUM_PC_WIENER_TAPS_LUMA],
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    const uint8_t *filter_selector, PcwienerBuffers *buffers, bool skip_acc_txskip_flag) {
+#else
     const uint8_t *filter_selector, PcwienerBuffers *buffers) {
+#endif    
   (void)is_uv;
   const bool skip_filtering = classify_only;
   assert(!is_uv);
@@ -1610,6 +1661,9 @@
         row - feature_lead, row, dgd, stride, width, feature_lead, feature_lag);
   }
   for (int row = 0; row < tskip_length - 1; ++row) {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    if(!skip_acc_txskip_flag)
+#endif
     av1_fill_tskip_sum_buffer(row - tskip_lead, tskip, tskip_stride,
                               buffers->tskip_sum_buffer, width, height,
                               tskip_lead, tskip_lag, tskip_strict);
@@ -1621,7 +1675,9 @@
         buffers->feature_sum_buffers, buffers->feature_line_buffers,
         row_to_process, feature_length - 1, dgd, stride, width, feature_lead,
         feature_lag);
-
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    if(!skip_acc_txskip_flag)
+#endif
     av1_fill_tskip_sum_buffer(i + tskip_lag, tskip, tskip_stride,
                               buffers->tskip_sum_buffer, width, height,
                               tskip_lead, tskip_lag, tskip_strict);
@@ -1634,7 +1690,11 @@
     if (!skip_row_compute) {
       // Initialize accumulators on the leftmost portion of the line.
       initialize_feature_accumulators(feature_lead, feature_lag, tskip_lead,
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+                                      tskip_lag, buffers, skip_acc_txskip_flag);
+#else
                                       tskip_lag, buffers);
+#endif      
       // Fill accumulators for processing width.
       update_accumulators(feature_lead, feature_lag, tskip_lead, tskip_lag,
                           width, buffers);
@@ -1768,7 +1828,11 @@
         rui->tskip + (j >> MI_SIZE_LOG2), rui->tskip_stride,
         rui->wiener_class_id + (j >> MI_SIZE_LOG2), rui->wiener_class_id_stride,
         rui->plane != AOM_PLANE_Y, bit_depth, false, pcwiener_filters_luma,
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+        filter_selector, rui->pcwiener_buffers, rui->skip_acc_txskip_flag);
+#else
         filter_selector, rui->pcwiener_buffers);
+#endif        
   }
 }
 #endif  // CONFIG_PC_WIENER
@@ -2506,6 +2570,9 @@
       ctxt->wiener_class_id_stride;
   rsi->unit_info[rest_unit_idx].qindex_offset = ctxt->qindex_offset;
   rsi->unit_info[rest_unit_idx].wiener_class_id_restrict = -1;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rsi->unit_info[rest_unit_idx].skip_acc_txskip_flag = rsi->skip_acc_txskip_flag;
+#endif  
 #endif  // CONFIG_PC_WIENER
 
   av1_loop_restoration_filter_unit(
@@ -2654,6 +2721,10 @@
     ctxt[plane].wiener_class_id = cm->mi_params.wiener_class_id[plane];
     ctxt[plane].wiener_class_id_stride =
         cm->mi_params.wiener_class_id_stride[plane];
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    cm->rst_info[plane].skip_acc_txskip_flag = (cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0; 
+    ctxt[plane].rsi->skip_acc_txskip_flag = cm->rst_info[plane].skip_acc_txskip_flag;
+#endif        
 #endif  // CONFIG_PC_WIENER
 
     av1_foreach_rest_unit_in_plane(cm, plane, lr_ctxt->on_rest_unit,
@@ -2672,7 +2743,6 @@
   const int num_planes = av1_num_planes(cm);
 
   AV1LrStruct *loop_rest_ctxt = (AV1LrStruct *)lr_ctxt;
-
   av1_loop_restoration_filter_frame_init(loop_rest_ctxt, frame, cm,
                                          optimized_lr, num_planes);
 
@@ -2900,13 +2970,30 @@
   //   MI_SIZE * m = N / D u
   //
   // from which we get u = D * MI_SIZE * m / N
+#if CONFIG_2D_SR_RESTORATION_BUG_FIX
+  const int mi_to_num_x = mi_size_x;
+  const int mi_to_num_y = mi_size_y;
+  const int denom_x = size;
+  const int denom_y = size;
+#else  // CONFIG_2D_SR_RESTORATION_BUG_FIX
   const int mi_to_num_x = av1_superres_scaled(cm)
                               ? mi_size_x * cm->superres_scale_denominator
                               : mi_size_x;
+#if CONFIG_2D_SR
+  const int mi_to_num_y = av1_superres_scaled(cm)
+                              ? mi_size_y * cm->superres_scale_denominator
+                              : mi_size_y;
+#else   // CONFIG_2D_SR
   const int mi_to_num_y = mi_size_y;
-  const int denom_x = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size;
-  const int denom_y = size;
+#endif  // CONFIG_2D_SR
 
+  const int denom_x = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size;
+#if CONFIG_2D_SR
+  const int denom_y = av1_superres_scaled(cm) ? size * SCALE_NUMERATOR : size;
+#else   // CONFIG_2D_SR
+  const int denom_y = size;
+#endif  // CONFIG_2D_SR
+#endif  // CONFIG_2D_SR_RESTORATION_BUG_FIX
   const int rnd_x = denom_x - 1;
   const int rnd_y = denom_y - 1;
 
@@ -2943,7 +3030,26 @@
   const int is_uv = plane > 0;
   const uint16_t *src_buf = frame->buffers[plane];
   const int src_stride = frame->strides[is_uv];
+#if CONFIG_2D_SR_RESTORATION_FIX
+  int row_ = row;
+  // NOTE: For now just scale the row value down to the downscaled domain.
+  // This may not be the best way, but it works for now. Needs revist in the
+  // future.
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR  
+  if (0) {
+#else
+  if (av1_superres_scaled(cm)) {
+#endif
+    row_ = (row * cm->superres_scale_numerator +
+            cm->superres_scale_denominator / 2) /
+           cm->superres_scale_denominator;
+  }
+#endif  // CONFIG_2D_SR
+#if CONFIG_2D_SR_RESTORATION_FIX
+  const uint16_t *src_rows = src_buf + row_ * src_stride;
+#else
   const uint16_t *src_rows = src_buf + row * src_stride;
+#endif
 
   uint16_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
                                 : boundaries->stripe_boundary_below;
@@ -2958,12 +3064,20 @@
   // fetching 2 "below" rows we need to fetch one and duplicate it.
   // This is equivalent to clamping the sample locations against the crop border
   const int lines_to_save =
+#if CONFIG_2D_SR_RESTORATION_FIX
+      AOMMIN(RESTORATION_CTX_VERT, frame->crop_heights[is_uv] - row_);
+#else
       AOMMIN(RESTORATION_CTX_VERT, frame->crop_heights[is_uv] - row);
+#endif
   assert(lines_to_save == 1 || lines_to_save == 2);
 
   int upscaled_width;
   int line_bytes;
-  if (av1_superres_scaled(cm)) {
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR  
+  if (0) { 
+#else 
+  if (av1_superres_scaled(cm)) { 
+#endif    
     const int ss_x = is_uv && cm->seq_params.subsampling_x;
     upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x;
     line_bytes = upscaled_width << 1;
@@ -2993,7 +3107,26 @@
   const int is_uv = plane > 0;
   const uint16_t *src_buf = frame->buffers[plane];
   const int src_stride = frame->strides[is_uv];
+#if CONFIG_2D_SR_RESTORATION_FIX
+  int row_ = row;
+  // NOTE: For now just scale the row value down to the downscaled domain.
+  // This may not be the best way, but it works for now. Needs revist in the
+  // future.
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR  
+  if (0) {
+#else  
+  if (av1_superres_scaled(cm)) {
+#endif    
+    row_ = (row * cm->superres_scale_numerator +
+            cm->superres_scale_denominator / 2) /
+           cm->superres_scale_denominator;
+  }
+#endif  // CONFIG_2D_SR
+#if CONFIG_2D_SR_RESTORATION_FIX
+  const uint16_t *src_rows = src_buf + row_ * src_stride;
+#else
   const uint16_t *src_rows = src_buf + row * src_stride;
+#endif
 
   uint16_t *bdry_buf = is_above ? boundaries->stripe_boundary_above
                                 : boundaries->stripe_boundary_below;
@@ -3037,8 +3170,12 @@
 
   RestorationStripeBoundaries *boundaries = &cm->rst_info[plane].boundaries;
 
+#if CONFIG_2D_SR_RESTORATION_FIX
+  const int plane_height = after_cdef ? ROUND_POWER_OF_TWO(cm->superres_upscaled_height, ss_y) : ROUND_POWER_OF_TWO(cm->height, ss_y);
+#else
   const int plane_height =
       ROUND_POWER_OF_TWO(cm->superres_upscaled_height, ss_y);
+#endif      
 
   int tile_stripe;
   for (tile_stripe = 0;; ++tile_stripe) {
@@ -3056,8 +3193,24 @@
     // can use deblocked pixels from adjacent tiles for context.
     const int use_deblock_above = (frame_stripe > 0);
     const int use_deblock_below = (y1 < plane_height);
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR
+    int save_deblock = 0;
+    int save_cdef = 0;
 
+    if(av1_superres_scaled(cm) && after_cdef) {
+      save_deblock = 1;
+      save_cdef = 1;    
+    }
+    else {
+      save_deblock = after_cdef ? 0 : 1;
+      save_cdef =  after_cdef ? 1 : 0;       
+    }
+#endif
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR
+    if (save_deblock) {
+#else
     if (!after_cdef) {
+#endif      
       // Save deblocked context where needed.
       if (use_deblock_above) {
         save_deblock_boundary_lines(frame, cm, plane, y0 - RESTORATION_CTX_VERT,
@@ -3067,7 +3220,12 @@
         save_deblock_boundary_lines(frame, cm, plane, y1, frame_stripe, 0,
                                     boundaries);
       }
-    } else {
+    }
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR
+    if (save_cdef) {
+#else     
+    else {
+#endif      
       // Save CDEF context where needed. Note that we need to save the CDEF
       // context for a particular boundary iff we *didn't* save deblocked
       // context for that boundary.

diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index f66987d..ae12c43 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h

@@ -83,9 +83,17 @@
     RESTORATION_PADDING))
 
 #if CONFIG_FLEXIBLE_RU_SIZE
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+#define RESTORATION_UNITSIZE_MAX 512 * 6
+#else
 #define RESTORATION_UNITSIZE_MAX 512
+#endif
+#else  // CONFIG_FLEXIBLE_RU_SIZE
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+#define RESTORATION_UNITSIZE_MAX 256 * 6
 #else
 #define RESTORATION_UNITSIZE_MAX 256
+#endif
 #endif  // CONFIG_FLEXIBLE_RU_SIZE
 #define RESTORATION_UNITPELS_HORZ_MAX \
   (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
@@ -351,6 +359,12 @@
    * Pointer to buffers for pcwiener computations.
    */
   PcwienerBuffers *pcwiener_buffers;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  /*!
+   * flag to skip accumulating txskip values
+   */
+  bool skip_acc_txskip_flag;
+#endif  
 #endif  // CONFIG_PC_WIENER
 #if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   /*!
@@ -490,6 +504,12 @@
    */
   int num_filter_classes;
 #endif  // CONFIG_WIENER_NONSEP
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  /*!
+   * flag to skip accumulating txskip values
+   */
+  bool skip_acc_txskip_flag;
+#endif
 } RestorationInfo;
 
 /*!\cond */
@@ -578,7 +598,11 @@
                                     RestorationLineBuffers *rlbs);
 
 typedef struct FilterFrameCtxt {
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  RestorationInfo *rsi;
+#else
   const RestorationInfo *rsi;
+#endif  
   int tile_stripe0;
   int ss_x, ss_y;
   int bit_depth;
@@ -599,6 +623,9 @@
   int qindex_offset;
   uint8_t *wiener_class_id;
   int wiener_class_id_stride;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  bool skip_acc_txskip_flag;
+#endif  
 #endif  // CONFIG_PC_WIENER
 } FilterFrameCtxt;
 
@@ -783,7 +810,11 @@
 
 #if CONFIG_FLEXIBLE_RU_SIZE
 void set_restoration_unit_size(int width, int height, int sx, int sy,
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+                               RestorationInfo *rst, uint8_t superres_scale_denominator);
+#else
                                RestorationInfo *rst);
+#endif                               
 #endif  // CONFIG_FLEXIBLE_RU_SIZE
 /*!\endcond */
 

diff --git a/av1/common/scale.c b/av1/common/scale.c
index 4a290a5..30705eb 100644
--- a/av1/common/scale.c
+++ b/av1/common/scale.c

@@ -41,6 +41,22 @@
   return val * (1 << SCALE_EXTRA_BITS);
 }
 
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+static INLINE int scaled_x_gen(int val, const struct scale_factors *sf) {
+  const int64_t tval = (int64_t)val * sf->x_scale_fp;
+  return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval, REF_SCALE_SHIFT);
+}
+
+static INLINE int scaled_y_gen(int val, const struct scale_factors *sf) {
+  const int64_t tval = (int64_t)val * sf->y_scale_fp;
+  return (int)ROUND_POWER_OF_TWO_SIGNED_64(tval, REF_SCALE_SHIFT);
+}
+
+static int unscaled_value_gen(int val, const struct scale_factors *sf) {
+  (void)sf;
+  return val;
+}
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 static int get_fixed_point_scale_factor(int other_size, int this_size) {
   // Calculate scaling factor once for each reference frame
   // and use fixed point scaling factors in decoding and encoding routines.
@@ -72,6 +88,25 @@
     return;
   }
 
+#if CONFIG_2D_SR_LIMIT_SCALE_FACTORS
+  // Limit the scale factor for a factor of SCALE_FACTOR
+  if( (other_h != this_h) || (other_w != this_w) ) {
+    //assert(SCALE_NUMERATOR==4);
+    int tmp;
+    tmp = ( other_w * SCALE_NUMERATOR * 2 ) / this_w;
+    tmp = ( tmp + 1 ) >> 1;
+    other_w = tmp;
+    this_w = SCALE_NUMERATOR;
+
+    tmp = ( other_h * SCALE_NUMERATOR * 2 ) / this_h;
+    tmp = ( tmp + 1 ) >> 1;
+    other_h = tmp;
+    this_h = SCALE_NUMERATOR;
+    assert(other_w==6 || other_w==8 || other_w==12 || other_w==16 || other_w==24);
+    assert(other_h==6 || other_h==8 || other_h==12 || other_h==16 || other_h==24);
+  }
+#endif
+
   sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w);
   sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h);
 
@@ -81,8 +116,16 @@
   if (av1_is_scaled(sf)) {
     sf->scale_value_x = scaled_x;
     sf->scale_value_y = scaled_y;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    sf->scale_value_x_gen = scaled_x_gen;
+    sf->scale_value_y_gen = scaled_y_gen;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   } else {
     sf->scale_value_x = unscaled_value;
     sf->scale_value_y = unscaled_value;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    sf->scale_value_x_gen = unscaled_value_gen;
+    sf->scale_value_y_gen = unscaled_value_gen;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   }
 }

diff --git a/av1/common/scale.h b/av1/common/scale.h
index 1508a9e..f4a78fa 100644
--- a/av1/common/scale.h
+++ b/av1/common/scale.h

@@ -20,10 +20,18 @@
 extern "C" {
 #endif
 
-#define SCALE_NUMERATOR 8
+#if CONFIG_2D_SR_SCALE_EXT 
+#define SCALE_NUMERATOR 4
+#else  // CONFIG_2D_SR_SCALE_EXT
+#define SCALE_NUMERATOR 8   
+#endif  // CONFIG_2D_SR_SCALE_EXT 
 
 #define REF_SCALE_SHIFT 14
 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT)
+#define REF_2x_SCALE (1 << (REF_SCALE_SHIFT + 1))
+#define REF_3x_SCALE (3 * (1 << REF_SCALE_SHIFT))
+#define REF_4x_SCALE (1 << (REF_SCALE_SHIFT + 2))
+#define REF_6x_SCALE (6 * (1 << REF_SCALE_SHIFT))
 #define REF_INVALID_SCALE -1
 
 struct scale_factors {
@@ -34,6 +42,11 @@
 
   int (*scale_value_x)(int val, const struct scale_factors *sf);
   int (*scale_value_y)(int val, const struct scale_factors *sf);
+
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  int (*scale_value_x_gen)(int val, const struct scale_factors *sf);
+  int (*scale_value_y_gen)(int val, const struct scale_factors *sf);
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 };
 
 MV32 av1_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
@@ -55,7 +68,11 @@
 
 static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
                                        int this_width, int this_height) {
-  return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
+#if CONFIG_2D_SR_SCALE_EXT 
+	return 7 * this_width > ref_width && 7 * this_height > ref_height &&
+#else  // CONFIG_2D_SR_SCALE_EXT
+	return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
+#endif  // CONFIG_2D_SR_SCALE_EXT 
          this_width <= 16 * ref_width && this_height <= 16 * ref_height;
 }
 

diff --git a/av1/common/tip.c b/av1/common/tip.c
index d8eb15d..f3374ef 100644
--- a/av1/common/tip.c
+++ b/av1/common/tip.c

@@ -129,9 +129,19 @@
 #else
   const int start_frame_order_hint = start_frame_buf->order_hint;
 #endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
-
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  const int is_scaled = (start_frame_buf->width != cm->width ||
+    start_frame_buf->height != cm->height);
+  struct scale_factors sf_;
+  // Inverse scale factor
+  av1_setup_scale_factors_for_frame(&sf_, cm->width, cm->height,
+    start_frame_buf->width,
+    start_frame_buf->height);
+  const struct scale_factors* sf = &sf_;
+#else
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
+#endif
 #if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const int *const ref_order_hints = start_frame_buf->ref_display_order_hint;
   const int cur_order_hint = cm->cur_frame->display_order_hint;
@@ -158,7 +168,33 @@
   const int mvs_cols =
       ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, TMVP_SHIFT_BITS);
   const int mvs_stride = mvs_cols;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+  uint32_t scaled_blk_col_hr_0 = 0;
+  uint32_t scaled_blk_col_hr_step = 0;
+  uint32_t scaled_blk_col_hr = 0;
+  uint32_t scaled_blk_row_hr_0 = 0;
+  uint32_t scaled_blk_row_hr_step = 0;
+  uint32_t scaled_blk_row_hr = 0;
+  if (is_scaled) {
+    scaled_blk_col_hr_0 =
+        (uint32_t)sf->x_scale_fp * 4;  // center of first block
+    scaled_blk_col_hr_step = (uint32_t)sf->x_scale_fp * 8;  // step
+    scaled_blk_row_hr_0 =
+        (uint32_t)sf->y_scale_fp * 4;  // center of first block
+    scaled_blk_row_hr_step = (uint32_t)sf->y_scale_fp * 8;  // step
+    scaled_blk_row_hr = scaled_blk_row_hr_0;
+  }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   for (int blk_row = 0; blk_row < mvs_rows; ++blk_row) {
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    int scaled_blk_row = blk_row;
+    if (is_scaled) {
+      scaled_blk_col_hr = scaled_blk_col_hr_0;
+      scaled_blk_row =
+          ROUND_POWER_OF_TWO(scaled_blk_row_hr, REF_SCALE_SHIFT + 3);
+      scaled_blk_row = AOMMIN(scaled_blk_row, mvs_rows - 1);
+    }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     for (int blk_col = 0; blk_col < mvs_cols; ++blk_col) {
       const MV_REF *mv_ref = &mv_ref_base[blk_row * mvs_stride + blk_col];
       MV_REFERENCE_FRAME ref_frame[2] = { mv_ref->ref_frame[0],
@@ -168,6 +204,16 @@
           const int ref_frame_order_hint = ref_order_hints[ref_frame[idx]];
           if (ref_frame_order_hint == target_order_hint) {
             MV ref_mv = mv_ref->mv[idx].as_mv;
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+            int scaled_blk_col = blk_col;
+            if (is_scaled) {
+              scaled_blk_col =
+                  ROUND_POWER_OF_TWO(scaled_blk_col_hr, REF_SCALE_SHIFT + 3);
+              scaled_blk_col = AOMMIN(scaled_blk_col, mvs_cols - 1);
+              ref_mv.row = sf->scale_value_y_gen(ref_mv.row, sf);
+              ref_mv.col = sf->scale_value_x_gen(ref_mv.col, sf);
+            }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
             int_mv this_mv;
             int mi_r = 0;
             int mi_c = 0;
@@ -180,7 +226,6 @@
                 ref_mv.row = -ref_mv.row;
                 ref_mv.col = -ref_mv.col;
               }
-
               const int mi_offset = mi_r * mvs_stride + mi_c;
               if (tpl_mvs_base[mi_offset].mfmv0.as_int == INVALID_MV) {
                 tpl_mvs_base[mi_offset].mfmv0.as_mv.row = ref_mv.row;
@@ -191,7 +236,13 @@
           }
         }
       }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+      if (is_scaled) scaled_blk_col_hr += scaled_blk_col_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
     }
+#if CONFIG_ACROSS_SCALE_TPL_MVS
+    if (is_scaled) scaled_blk_row_hr += scaled_blk_row_hr_step;
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
   }
 
   return 1;
@@ -590,10 +641,26 @@
   const int is_scaled = has_scale(subpel_params->xs, subpel_params->ys);
   assert(conv_params->dst != NULL);
   if (is_scaled) {
+// Note when CONFIG_2D_SR_STRIDED_CONV_SPEED is enabled, we can use the
+// accelerated functions and so do not have to force the use of av1_highbd_convolve_2d_scale_c
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if (conv_params->stride_scale == 1) {
+      av1_highbd_convolve_2d_scale(
+          src, src_stride, dst, dst_stride, w, h, interp_filters[0],
+          interp_filters[1], subpel_params->subpel_x, subpel_params->xs,
+          subpel_params->subpel_y, subpel_params->ys, conv_params, bd);
+    } else {
+      av1_highbd_convolve_2d_scale_strided(
+          src, src_stride, dst, dst_stride, w, h, interp_filters[0],
+          interp_filters[1], subpel_params->subpel_x, subpel_params->xs,
+          subpel_params->subpel_y, subpel_params->ys, conv_params, bd);
+    }
+#else
     av1_highbd_convolve_2d_scale(
         src, src_stride, dst, dst_stride, w, h, interp_filters[0],
         interp_filters[1], subpel_params->subpel_x, subpel_params->xs,
         subpel_params->subpel_y, subpel_params->ys, conv_params, bd);
+#endif
   } else {
     revert_scale_extra_bits(subpel_params);
     tip_highbd_convolve_2d_facade_compound(src, src_stride, dst, dst_stride, w,
@@ -937,6 +1004,9 @@
     av1_init_inter_params(&inter_pred_params, comp_bw, comp_bh, comp_pixel_y,
                           comp_pixel_x, ss_x, ss_y, bd, 0, sf, pred_buf,
                           MULTITAP_SHARP);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
 
 #if CONFIG_REFINEMV
     if (apply_refinemv) {
@@ -1078,6 +1148,9 @@
     av1_init_inter_params(&inter_pred_params, comp_bw, comp_bh, comp_pixel_y,
                           comp_pixel_x, ss_x, ss_y, bd, 0, sf, pred_buf,
                           MULTITAP_SHARP);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
 
     inter_pred_params.comp_mode = UNIFORM_COMP;
 
@@ -1257,6 +1330,20 @@
       ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, TMVP_SHIFT_BITS);
   tip_setup_tip_frame_planes(cm, xd, 0, 0, mvs_rows, mvs_cols, mvs_cols, mc_buf,
                              tmp_conv_dst, calc_subpel_params_func);
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  // TODO: Selectively skip upsampling if PEF is to be executed, since PEF would perform the upsampiling.
+  // Possible condition to check for PEF: if (cm->seq_params.enable_pef && cm->features.allow_pef) {
+  if (av1_superres_scaled(cm)) {
+    // Upscale tip_frame and store in upsampled_tip_frame_buf
+#if CONFIG_2D_SR
+    av1_upscale_normative_2d_and_extend_frame(
+        cm, &cm->tip_ref.tip_frame->buf, &cm->tip_ref.upscaled_tip_frame_buf);
+#else
+    av1_upscale_normative_and_extend_frame(cm, &cm->tip_ref.tip_frame->buf,
+                                           &cm->tip_ref.upscaled_tip_frame_buf);
+#endif  // CONFIG_2D_SR
+  }
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 
 static void tip_extend_plane_block_based_highbd(

diff --git a/av1/common/tip.h b/av1/common/tip.h
index d5c50d6..12177d9 100644
--- a/av1/common/tip.h
+++ b/av1/common/tip.h

@@ -20,6 +20,9 @@
 #include "av1/common/av1_common_int.h"
 #include "av1/common/mvref_common.h"
 #include "av1/common/reconinter.h"
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+#include "av1/common/resize.h"
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 
 #if CONFIG_OPTFLOW_ON_TIP
 #define TIP_RD_CORRECTION 100000
@@ -88,6 +91,18 @@
   } else if (fullmv.col + (blk_col << TMVP_MI_SZ_LOG2) < 0) {
     fullmv.col = -(blk_col << TMVP_MI_SZ_LOG2);
   }
+#if 1  // CONFIG_ACROSS_SCALE_TPL_MVS
+  const int mvs_rows =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, TMVP_SHIFT_BITS);
+  const int mvs_cols =
+      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, TMVP_SHIFT_BITS);
+  if ((fullmv.row >> TMVP_MI_SZ_LOG2) + blk_row >= mvs_rows) {
+    fullmv.row = ((mvs_rows - blk_row) << TMVP_MI_SZ_LOG2) - 1;
+  }
+  if ((fullmv.col >> TMVP_MI_SZ_LOG2) + blk_col >= mvs_cols) {
+    fullmv.col = ((mvs_cols - blk_col) << TMVP_MI_SZ_LOG2) - 1;
+  }
+#endif  // CONFIG_ACROSS_SCALE_TPL_MVS
 
   return fullmv;
 }

diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 6fa6b52..46f0988 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c

@@ -437,13 +437,21 @@
     leads to a maximum value of about 282 * 2^k after applying the offset.
     So in that case we still need to clamp.
 */
+
 void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
                               int width, int height, int stride, uint16_t *pred,
                               int p_col, int p_row, int p_width, int p_height,
                               int p_stride, int subsampling_x,
                               int subsampling_y, int bd,
                               ConvolveParams *conv_params, int16_t alpha,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                              int16_t beta, int16_t gamma, int16_t delta,
+                              const int x_step_qn, const int y_step_qn) {
+#else
                               int16_t beta, int16_t gamma, int16_t delta) {
+#endif
+//  printf("C");
+
   int32_t tmp[15 * 8];
   const int reduce_bits_horiz =
       conv_params->round_0 +
@@ -461,6 +469,36 @@
   (void)max_bits_horiz;
   assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+  // Determine our stride
+  assert( (x_step_qn >> SCALE_SUBPEL_BITS) == (y_step_qn >> SCALE_SUBPEL_BITS) );
+  const int x_conv_stride = x_step_qn >> SCALE_SUBPEL_BITS;
+  const int y_conv_stride = y_step_qn >> SCALE_SUBPEL_BITS;
+
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+  static uint16_t * buffer = NULL;
+  static int buffer_width = -1;
+  const int mode_1_5x_flag = ( x_step_qn + ( 1 << (SCALE_SUBPEL_BITS - 2 ) ) ) >> (SCALE_SUBPEL_BITS -1 ) == 3 ? 1 : 0;
+
+  if( mode_1_5x_flag && buffer_width < width ) {
+    if(buffer==NULL)
+      buffer = malloc( width * sizeof(uint16_t) );
+    else
+      buffer = realloc( buffer, width * sizeof(uint16_t) );
+
+    buffer_width = width;
+  }
+#endif
+
+  // Determine the width of the image if it was converted to the lower resolution
+  // and then back to the current resolution.  Note that when mode_1_5x_flag is
+  // false, the width is equal x_conv_stride * (width/x_conv_stride - 1).  This
+  // is written as width - width%x_conv_stride - x_conv_stride in the warp functions.
+  const int width_strided_minus_1 = mode_1_5x_flag ? 3*(2*width/3 - 1)/2:
+                                       width - width%x_conv_stride - x_conv_stride;
+#endif
+
+
   for (int i = p_row; i < p_row + p_height; i += 8) {
     for (int j = p_col; j < p_col + p_width; j += 8) {
       // Calculate the center of this 8x8 block,
@@ -471,11 +509,13 @@
       const int32_t src_y = (i + 4) << subsampling_y;
       const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
       const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
+
       const int32_t x4 = dst_x >> subsampling_x;
       const int32_t y4 = dst_y >> subsampling_y;
 
       const int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
       int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
+
       const int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
       int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
 
@@ -487,11 +527,63 @@
 
       // Horizontal filter
       for (int k = -7; k < 8; ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+        const int iy = clamp(x_conv_stride * (iy4 + k), 0, height - 1);
+#else
         const int iy = clamp(iy4 + k, 0, height - 1);
+#endif
+
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+
+        // Create an intermediate input buffer when the scale factor is 1.5x.
+        if( mode_1_5x_flag ) {
+          // Determine the line in the reference image that corresponds to the desired iy
+          const int src_iy = clamp(3 * (iy4 + k) / 2, 0, height - 1);
+
+          // Determine if we are interpolating vertically
+          // Note: A modulo-3 value of two indicates interpolation but
+          // should only occur due to the integer division in the clamp
+          // operation above - except when the clamping limits are applied.
+          // In practice, this happens at the bottom of the frame.  We disable
+          // interpolation in that case.
+          const int interp_vertical = ( (src_iy % 3 == 1) && (src_iy < (height-1)) ) ? 1 : 0;
+
+          // Create the buffer
+          if (interp_vertical) {
+            const uint16_t *ptr0 = &(ref[src_iy * stride]);
+            const uint16_t *ptr1 = &(ref[(src_iy + 1) * stride]);
+
+            for (int ix = ix4 - 7; ix < ix4 + 8; ix++) {
+              int src_ix = clamp(3 * ix / 2, 0, width_strided_minus_1);
+              int dst_ix = clamp( ix, 0, width - 1);
+
+              if (src_ix % 3 == 1){
+                buffer[dst_ix] = (ptr0[src_ix] + ptr0[src_ix + 1]
+                                  + ptr1[src_ix] + ptr1[src_ix + 1] + 2) >> 2;
+              } else
+                buffer[dst_ix] = (ptr0[src_ix] + ptr1[src_ix] + 1) >> 1;
+            }
+          } else {
+            const uint16_t *ptr = &(ref[src_iy * stride]);
+
+            for (int ix = ix4 - 7; ix < ix4 + 8; ix++) {
+              int src_ix = clamp(3 * ix / 2, 0, width_strided_minus_1);
+              int dst_ix = clamp( ix, 0, width - 1);
+
+              if (src_ix % 3 == 1) {
+                buffer[dst_ix] = (ptr[src_ix] + ptr[src_ix + 1] + 1) >> 1;
+              } else
+                buffer[dst_ix] = ptr[src_ix];
+            }
+          }
+        }
+#endif
 
         int sx = sx4 + beta * (k + 4);
+
         for (int l = -4; l < 4; ++l) {
           int ix = ix4 + l - 3;
+
           const int offs = ROUND_POWER_OF_TWO(sx, WARPEDDIFF_PREC_BITS) +
                            WARPEDPIXEL_PREC_SHIFTS;
           assert(offs >= 0 && offs <= WARPEDPIXEL_PREC_SHIFTS * 3);
@@ -499,8 +591,21 @@
 
           int32_t sum = 1 << offset_bits_horiz;
           for (int m = 0; m < 8; ++m) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+            const int sample_x = clamp(x_conv_stride * (ix + m), 0, width_strided_minus_1);
+#else
             const int sample_x = clamp(ix + m, 0, width - 1);
+#endif
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+            if( mode_1_5x_flag ) {
+              int sample_x = clamp( (ix+m), 0, width - 1);
+              sum += buffer[sample_x] * coeffs[m];
+            }
+            else
+              sum += ref[iy * stride + sample_x] * coeffs[m];
+#else
             sum += ref[iy * stride + sample_x] * coeffs[m];
+#endif
           }
           sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
           assert(0 <= sum && sum < (1 << max_bits_horiz));
@@ -566,7 +671,12 @@
                        int width, int height, int stride, uint16_t *const pred,
                        int p_col, int p_row, int p_width, int p_height,
                        int p_stride, int subsampling_x, int subsampling_y,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                       int bd, ConvolveParams *conv_params,
+                       const SubpelParams *subpel_params) {
+#else
                        int bd, ConvolveParams *conv_params) {
+#endif 
   assert(wm->wmtype <= AFFINE);
   if (wm->wmtype == ROTZOOM) {
     wm->wmmat[5] = wm->wmmat[2];
@@ -578,10 +688,27 @@
   const int16_t gamma = wm->gamma;
   const int16_t delta = wm->delta;
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+  if(subpel_params->xs != (1 << SCALE_SUBPEL_BITS)) {
+
+    av1_highbd_warp_affine_sse4_1(mat, ref, width, height, stride, pred, p_col, p_row,
+                                  p_width, p_height, p_stride, subsampling_x,
+                                  subsampling_y, bd, conv_params, alpha, beta, gamma,
+                                  delta, subpel_params->xs, subpel_params->ys);
+
+  }
+  else {
+    av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
+                           p_width, p_height, p_stride, subsampling_x,
+                           subsampling_y, bd, conv_params, alpha, beta, gamma,
+                           delta, subpel_params->xs, subpel_params->ys);
+  }
+#else
   av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row,
                          p_width, p_height, p_stride, subsampling_x,
                          subsampling_y, bd, conv_params, alpha, beta, gamma,
                          delta);
+#endif    
 }
 
 int64_t av1_calc_highbd_frame_error(const uint16_t *const ref, int stride,
@@ -643,10 +770,18 @@
                     int width, int height, int stride, uint16_t *pred,
                     int p_col, int p_row, int p_width, int p_height,
                     int p_stride, int subsampling_x, int subsampling_y,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                    ConvolveParams *conv_params, const SubpelParams *subpel_params) {
+#else
                     ConvolveParams *conv_params) {
+#endif 
   highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
                     p_height, p_stride, subsampling_x, subsampling_y, bd,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                    conv_params, subpel_params);
+#else
                     conv_params);
+#endif
 }
 
 #define LS_MV_MAX 256  // max mv in 1/8-pel

diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index 9722b22..12b2326 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h

@@ -263,11 +263,31 @@
                                     const uint16_t *const dst, int p_width,
                                     int p_height, int p_stride, int bd);
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+typedef struct SubpelParams {
+  int xs;
+  int ys;
+  int subpel_x;
+  int subpel_y;
+
+#if CONFIG_D071_IMP_MSK_BLD
+  int x0;  // top left sample horizontal cood.
+  int y0;  // top left sample vertical cood.
+  int x1;  // x0 + bw
+  int y1;  // y0 + bh
+#endif     // CONFIG_D071_IMP_MSK_BLD
+} SubpelParams;
+#endif
+
 void highbd_warp_plane(WarpedMotionParams *wm, const uint16_t *const ref,
                        int width, int height, int stride, uint16_t *const pred,
                        int p_col, int p_row, int p_width, int p_height,
                        int p_stride, int subsampling_x, int subsampling_y,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                       int bd, ConvolveParams *conv_params, const SubpelParams *subpel_params);
+#else
                        int bd, ConvolveParams *conv_params);
+#endif
 
 void warp_plane(WarpedMotionParams *wm, const uint8_t *const ref, int width,
                 int height, int stride, uint8_t *pred, int p_col, int p_row,
@@ -278,7 +298,11 @@
                     int width, int height, int stride, uint16_t *pred,
                     int p_col, int p_row, int p_width, int p_height,
                     int p_stride, int subsampling_x, int subsampling_y,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                    ConvolveParams *conv_params, const SubpelParams *subpel_params);
+#else
                     ConvolveParams *conv_params);
+#endif   
 
 int av1_find_projection(int np, const int *pts1, const int *pts2,
                         BLOCK_SIZE bsize, MV mv, WarpedMotionParams *wm_params,

diff --git a/av1/common/x86/av1_convolve_scale_sse4.c b/av1/common/x86/av1_convolve_scale_sse4.c
index 51553ff..c02a495 100644
--- a/av1/common/x86/av1_convolve_scale_sse4.c
+++ b/av1/common/x86/av1_convolve_scale_sse4.c

@@ -24,6 +24,277 @@
   return _mm_madd_epi16(data, coeff);
 }
 
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+static __m128i strided_load_2x(const uint16_t *const src) {
+
+  __m128i control = _mm_setr_epi8(0, 1, 4, 5,
+                                  8, 9, 12, 13,
+                                  0x80, 0x80, 0x80, 0x80,
+                                  0x80, 0x80, 0x80, 0x80 );
+
+  // Load the first 8 values and de-interleave into the lowest 64 bits of data0
+  __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  __m128i data1 = _mm_shuffle_epi8( data0, control );
+
+  // Load the second 8 values and de-interleave in the lowest 64 bits of data2
+  __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  __m128i data3 = _mm_shuffle_epi8( data2, control );
+
+  // Combine the results
+  __m128i data4 = _mm_unpacklo_epi64( data1, data3);
+
+  return data4;
+
+}
+
+static __m128i strided_load_3x(const uint16_t *const src) {
+
+  //[*0 1 2 *3 4 5 *6 7][8 *9 10 11 *12 13 14 *15][16 17 *18 19 20 *21 22 23]
+
+    // Load data from the first 8 values and store in their correct location
+  const __m128i control0 = _mm_setr_epi8(0, 1, 6, 7,
+                                        12, 13, 0x80, 0x80,
+                                        0x80, 0x80, 0x80, 0x80,
+                                        0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  const __m128i data1 = _mm_shuffle_epi8( data0, control0 );
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                        0x80, 0x80, 2, 3,
+                                        8, 9, 14, 15,
+                                        0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  const __m128i data3 = _mm_shuffle_epi8( data2, control1 );
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control2 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         4, 5, 10, 11 );
+
+  const __m128i data4 = _mm_loadu_si128((__m128i *)src+2 );
+  const __m128i data5 = _mm_shuffle_epi8( data4, control2 );
+
+  // Combine the results
+  const __m128i data6 = _mm_blend_epi16(data1, data3, 0b00111000);
+  const __m128i data7 = _mm_blend_epi16(data6, data5, 0b11000000);
+
+  return data7;
+}
+
+static __m128i strided_load_4x(const uint16_t *const src) {
+
+  const __m128i control0 = _mm_setr_epi8(0, 1, 8, 9,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i control1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0, 1, 8, 9,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+
+  // Load the first 8 values and store the two desired values in the lowest 64 bits of data0
+  const __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  const __m128i data1 = _mm_shuffle_epi8( data0, control0 );
+
+  // Load the second 8 values and store the two desired values in the lowest 64 bits of data2
+  const __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  const __m128i data3 = _mm_shuffle_epi8( data2, control1 );
+
+  // Load the third 8 values and store the two desired values in the lowest 64 bits of data2
+  const __m128i data4 = _mm_loadu_si128((__m128i *)src+2 );
+  const __m128i data5 = _mm_shuffle_epi8( data4, control0 );
+
+  // Load the fourth 8 values and store the two desired values in the lowest 64 bits of data2
+  const __m128i data6 = _mm_loadu_si128((__m128i *)src+3 );
+  const __m128i data7 = _mm_shuffle_epi8( data6, control1 );
+
+  // Combine the results
+  /*
+  const __m128i data8 = _mm_blend_epi16(data1, data3, 0b00001100);
+  const __m128i data9 = _mm_blend_epi16(data5, data7, 0b00001100);
+  const __m128i data10 = _mm_unpacklo_epi64( (__m128i)data8, (__m128i)data9);
+  */
+  __m128i data8 = _mm_blend_epi16(data1, data3, 0b00001100);
+  __m128i data9 = _mm_blend_epi16(data5, data7, 0b00001100);
+  const __m128i data10 = _mm_unpacklo_epi64(data8, data9);
+
+  return data10;
+
+}
+
+static __m128i strided_load_6x(const uint16_t *const src) {
+
+  //[*0 1 2 3 4 5 *6 7][8 9 10 11 *12 13 14 15][16 17 *18 19 20 21 22 23]...
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control0 = _mm_setr_epi8(0, 1, 12, 13,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  const __m128i data1 = _mm_shuffle_epi8( data0, control0 );
+
+  // Load data from the second 8 values and store in their correct location
+  const __m128i control1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         8, 9, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  const __m128i data3 = _mm_shuffle_epi8( data2, control1 );
+
+  // Load data from the third 8 values and store in their correct location
+  const __m128i control2 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 4, 5,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data4 = _mm_loadu_si128((__m128i *)src+2 );
+  const __m128i data5 = _mm_shuffle_epi8( data4, control2 );
+
+  // Combine the results
+  const __m128i data6 = _mm_blend_epi16(data1, data3, 0b00000100);
+  const __m128i data7 = _mm_blend_epi16(data6, data5, 0b00001000);
+
+  // Load data from the four 8 values and store in the low bits
+  const __m128i data8 = _mm_shuffle_epi8( _mm_loadu_si128((__m128i *)src+3 ), control0 );
+
+  // Load data from the four 8 values and store in the low bits
+  const __m128i data9 = _mm_shuffle_epi8( _mm_loadu_si128((__m128i *)src+4 ), control1 );
+
+  // Load data from the four 8 values and store in the low bits
+  const __m128i data10 = _mm_shuffle_epi8( _mm_loadu_si128((__m128i *)src+5 ), control2 );
+
+  // Combine the results
+  const __m128i data11 = _mm_blend_epi16(data8, data9, 0b00000100);
+  const __m128i data12 = _mm_blend_epi16(data11, data10, 0b00001000);
+  const __m128i data13 = _mm_unpacklo_epi64( data7, data12);
+
+  return data13;
+
+}
+
+// A specialised version of hfilter, the horizontal filter for
+// av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
+// filters.
+static void highbd_hfilter8_strided(const uint16_t *src, int src_stride, int16_t *dst,
+                                    int w, int h, int subpel_x_qn, int x_step_qn,
+                                    const InterpFilterParams *filter_params,
+                                    unsigned round, int bd, int y_conv_stride) {
+
+
+  // Only tested for a sample factor of two (currently)
+  //assert(y_conv_stride == 2);
+  //assert(x_step_qn == (2 << SCALE_SUBPEL_BITS) );
+
+  // Determine the stride of the horizontal convolution
+  const int x_conv_stride = x_step_qn >> SCALE_SUBPEL_BITS;
+  assert( (x_conv_stride <= 6) && (x_conv_stride!=5) );
+
+  const int ntaps = 8;
+
+  src -= ( ntaps / 2 - 1 ) * x_conv_stride;
+
+  int32_t round_add32 = (1 << round) / 2 + (1 << (bd + FILTER_BITS - 1));
+  const __m128i round_add = _mm_set1_epi32(round_add32);
+  const __m128i round_shift = _mm_cvtsi32_si128(round);
+
+  int x_qn = subpel_x_qn;
+  for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
+      const uint16_t *src_col = src + (x_qn >> SCALE_SUBPEL_BITS);
+      const int filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
+
+      assert(filter_idx < SUBPEL_SHIFTS);
+      const int16_t *filter =
+          av1_get_interp_filter_subpel_kernel(filter_params, filter_idx);
+
+      // Load the filter coefficients
+      const __m128i coefflo = _mm_loadu_si128((__m128i *)filter);
+
+      int y;
+      for (y = 0; y <= h - 4; y += 4) {
+        const uint16_t *const src0 = src_col + y * src_stride * y_conv_stride;
+        const uint16_t *const src1 = src0 + 1 * src_stride * y_conv_stride;
+        const uint16_t *const src2 = src0 + 2 * src_stride * y_conv_stride;
+        const uint16_t *const src3 = src0 + 3 * src_stride * y_conv_stride;
+
+        // Load source data
+        __m128i data0lo, data1lo, data2lo, data3lo;
+
+        if( x_conv_stride==1 ) {
+          data0lo = _mm_loadu_si128((__m128i *)src0);
+          data1lo = _mm_loadu_si128((__m128i *)src1);
+          data2lo = _mm_loadu_si128((__m128i *)src2);
+          data3lo = _mm_loadu_si128((__m128i *)src3);
+        }
+        if( x_conv_stride==2 ) {
+          data0lo = strided_load_2x(src0);
+          data1lo = strided_load_2x(src1);
+          data2lo = strided_load_2x(src2);
+          data3lo = strided_load_2x(src3);
+        }
+        else if( x_conv_stride==3) {
+          data0lo = strided_load_3x(src0);
+          data1lo = strided_load_3x(src1);
+          data2lo = strided_load_3x(src2);
+          data3lo = strided_load_3x(src3);
+        }
+        else if( x_conv_stride==4) {
+          data0lo = strided_load_4x(src0);
+          data1lo = strided_load_4x(src1);
+          data2lo = strided_load_4x(src2);
+          data3lo = strided_load_4x(src3);
+        }
+        else if ( x_conv_stride==6 ) {
+          data0lo = strided_load_6x(src0);
+          data1lo = strided_load_6x(src1);
+          data2lo = strided_load_6x(src2);
+          data3lo = strided_load_6x(src3);
+        }
+
+        // Multiply by coefficients
+        const __m128i conv0lo = _mm_madd_epi16(data0lo, coefflo);
+        const __m128i conv1lo = _mm_madd_epi16(data1lo, coefflo);
+        const __m128i conv2lo = _mm_madd_epi16(data2lo, coefflo);
+        const __m128i conv3lo = _mm_madd_epi16(data3lo, coefflo);
+
+        // Reduce horizontally and add
+        const __m128i conv01lo = _mm_hadd_epi32(conv0lo, conv1lo);
+        const __m128i conv23lo = _mm_hadd_epi32(conv2lo, conv3lo);
+        const __m128i conv = _mm_hadd_epi32(conv01lo, conv23lo);
+
+        // Divide down by (1 << round), rounding to nearest.
+        __m128i shifted =
+            _mm_sra_epi32(_mm_add_epi32(conv, round_add), round_shift);
+
+        shifted = _mm_packus_epi32(shifted, shifted);
+
+        // Write transposed to the output
+        _mm_storel_epi64((__m128i *)(dst + y + x * h), shifted);
+
+      }
+
+      for (; y < h; ++y) {
+        const uint16_t *const src_row = src_col + y * src_stride * y_conv_stride;
+
+        int32_t sum = (1 << (bd + FILTER_BITS - 1));
+        for (int k = 0; k < ntaps; ++k) {
+          sum += filter[k] * src_row[k * x_conv_stride];
+        }
+
+        dst[y + x * h] = ROUND_POWER_OF_TWO(sum, round);
+      }
+    }
+}
+#endif
+
 // A specialised version of hfilter, the horizontal filter for
 // av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
 // filters.
@@ -95,6 +366,8 @@
     }
   }
 }
+
+
 // A specialised version of vfilter, the vertical filter for
 // av1_highbd_convolve_2d_scale_sse4_1. This version only supports 8 tap
 // filters.
@@ -245,8 +518,13 @@
     const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
     ConvolveParams *conv_params, int bd) {
   // TODO(yaowu): Move this out of stack
+#if CONFIG_2D_SR_SCALE_EXT
+  int16_t *tmp = (int16_t *)aom_memalign(2, (6 * MAX_SB_SIZE + MAX_FILTER_TAP) * (6 * MAX_SB_SIZE) * sizeof(int16_t));
+#else
   DECLARE_ALIGNED(16, int16_t,
                   tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
+#endif  // CONFIG_2D_SR_SCALE_EXT
+
   int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
              filter_params_y->taps;
   const int xtaps = filter_params_x->taps;
@@ -264,4 +542,67 @@
   // vertical filter (input is transposed)
   highbd_vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
                   filter_params_y, conv_params, bd);
+
+#if CONFIG_2D_SR_SCALE_EXT
+  aom_free(tmp);
+#endif
 }
+
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+void av1_highbd_convolve_2d_scale_strided_sse4_1(
+    const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
+    int h, const InterpFilterParams *filter_params_x,
+    const InterpFilterParams *filter_params_y, const int subpel_x_qn,
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+    const int x_step_qn, const int subpel_y_qn, int y_step_qn,
+#else
+    const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
+#endif
+    ConvolveParams *conv_params, int bd) {
+  // TODO(yaowu): Move this out of stack
+#if CONFIG_2D_SR_SCALE_EXT && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+  //	DECLARE_ALIGNED(16, int16_t,
+  //	tmp[(6 * MAX_SB_SIZE + MAX_FILTER_TAP) * (6 * MAX_SB_SIZE)]);
+  int16_t *tmp = (int16_t *)aom_memalign(2, (6 * MAX_SB_SIZE + MAX_FILTER_TAP) * (6 * MAX_SB_SIZE) * sizeof(int16_t));
+#else  // CONFIG_2D_SR_SCALE_EXT
+  DECLARE_ALIGNED(16, int16_t,
+                  tmp[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
+#endif  // CONFIG_2D_SR_SCALE_EXT
+
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+  int y_conv_stride = y_step_qn >> SCALE_SUBPEL_BITS;
+  y_step_qn = y_step_qn / y_conv_stride;
+  //assert(y_conv_stride==2);
+  assert(y_step_qn == ( 1 << SCALE_SUBPEL_BITS ) || y_step_qn == 1536 );
+#endif
+
+  int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
+             filter_params_y->taps;
+  const int xtaps = filter_params_x->taps;
+  const int ytaps = filter_params_y->taps;
+  const int fo_vert = ytaps / 2 - 1;
+
+  assert((xtaps == 8) && (ytaps == 8));
+  (void)xtaps;
+
+  // horizontal filter
+#if CONFIG_2D_SR_STRIDED_CONV_SPEED
+  highbd_hfilter8_strided(src - fo_vert * src_stride * y_conv_stride,
+                          src_stride, tmp, w, im_h,
+                          subpel_x_qn, x_step_qn, filter_params_x,
+                          conv_params->round_0, bd, y_conv_stride);
+#else
+  highbd_hfilter8(src - fo_vert * src_stride, src_stride, tmp, w, im_h,
+                  subpel_x_qn, x_step_qn, filter_params_x, conv_params->round_0,
+                  bd);
+#endif
+
+  // vertical filter (input is transposed)
+  highbd_vfilter8(tmp, im_h, dst, dst_stride, w, h, subpel_y_qn, y_step_qn,
+                  filter_params_y, conv_params, bd);
+
+#if CONFIG_2D_SR_SCALE_EXT && !CONFIG_2D_SR_STRIDED_CONV_SPEED
+  aom_free(tmp);
+#endif  
+}
+#endif

diff --git a/av1/common/x86/highbd_warp_affine_avx2.c b/av1/common/x86/highbd_warp_affine_avx2.c
index 4b0effc..b46f038 100644
--- a/av1/common/x86/highbd_warp_affine_avx2.c
+++ b/av1/common/x86/highbd_warp_affine_avx2.c

@@ -14,6 +14,9 @@
 #include "config/av1_rtcd.h"
 
 #include "av1/common/warped_motion.h"
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+#include "av1/encoder/rd.h"
+#endif 
 
 void av1_highbd_warp_affine_avx2(const int32_t *mat, const uint16_t *ref,
                                  int width, int height, int stride,
@@ -21,7 +24,17 @@
                                  int p_width, int p_height, int p_stride,
                                  int subsampling_x, int subsampling_y, int bd,
                                  ConvolveParams *conv_params, int16_t alpha,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                                 int16_t beta, int16_t gamma, int16_t delta,
+                                 const int x_step_qn, const int y_step_qn) {
+#else
                                  int16_t beta, int16_t gamma, int16_t delta) {
+#endif 
+
+//#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+//fprintf(g_log, "", );
+//#endif 
+
   __m256i tmp[15];
   const int reduce_bits_horiz =
       conv_params->round_0 +
@@ -63,6 +76,12 @@
   (void)mhoriz;
   int sx;
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+  // Determine our stride
+  assert(x_step_qn == y_step_qn);
+  const int x_conv_stride = x_step_qn >> SCALE_SUBPEL_BITS;
+#endif
+
   for (int i = 0; i < p_height; i += 8) {
     for (int j = 0; j < p_width; j += 8) {
       // Calculate the center of this 8x8 block,
@@ -76,7 +95,11 @@
       const int32_t x4 = dst_x >> subsampling_x;
       const int32_t y4 = dst_y >> subsampling_y;
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+      int16_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+#else
       const int16_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
+#endif
       int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
       const int16_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
       int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
@@ -92,7 +115,11 @@
       // Horizontal filter
       if (ix4 <= -7) {
         for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          int iy = x_conv_stride * (iy4 + k);
+#else
           int iy = iy4 + k;
+#endif          
           if (iy < 0)
             iy = 0;
           else if (iy > height - 1)
@@ -103,7 +130,11 @@
         }
       } else if (ix4 >= width + 6) {
         for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          int iy = x_conv_stride * (iy4 + k);
+#else
           int iy = iy4 + k;
+#endif          
           if (iy < 0)
             iy = 0;
           else if (iy > height - 1)
@@ -116,8 +147,11 @@
       } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
         int32_t tmp1[8];
         for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          const int iy = clamp(x_conv_stride * (iy4 + k), 0, height - 1);
+#else
           const int iy = clamp(iy4 + k, 0, height - 1);
-
+#endif
           sx = sx4 + beta * (k + 4);
           for (int l = -4; l < 4; ++l) {
             int ix = ix4 + l - 3;
@@ -126,7 +160,11 @@
 
             int32_t sum = 1 << offset_bits_horiz;
             for (int m = 0; m < 8; ++m) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP            
+              const int sample_x = clamp(x_conv_stride * (ix + m), 0, width - 1);
+#else
               const int sample_x = clamp(ix + m, 0, width - 1);
+#endif                
               sum += ref[iy * stride + sample_x] * coeffs[m];
             }
             sum = ROUND_POWER_OF_TWO(sum, reduce_bits_horiz);
@@ -150,7 +188,12 @@
           __m256i v_c67 = _mm256_broadcastd_epi32(
               _mm_shuffle_epi32(v_01, 3));  // A7A6A7A6A7A6A7A6
           for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+            int iy = x_conv_stride * (iy4 + k);
+            ix4 = x_conv_stride * ix4;
+#else
             int iy = iy4 + k;
+#endif
             if (iy < 0)
               iy = 0;
             else if (iy > height - 1)
@@ -197,7 +240,12 @@
           }
         } else if (alpha == 0) {
           for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+            int iy = x_conv_stride * (iy4 + k);
+            ix4 = x_conv_stride * ix4;
+#else
             int iy = iy4 + k;
+#endif            
             if (iy < 0)
               iy = 0;
             else if (iy > height - 1)
@@ -325,7 +373,12 @@
               _mm256_unpackhi_epi64(v_c0123u, v_c4567u);  // H7H6 ... A7A6
 
           for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+            int iy = x_conv_stride * (iy4 + k);
+            ix4 = x_conv_stride * ix4;
+#else
             int iy = iy4 + k;
+#endif            
             if (iy < 0)
               iy = 0;
             else if (iy > height - 1)
@@ -374,7 +427,12 @@
 
         } else {
           for (int k = -7; k < AOMMIN(8, p_height - i); ++k) {
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+            int iy = x_conv_stride * (iy4 + k);
+            ix4 = x_conv_stride * ix4;
+#else
             int iy = iy4 + k;
+#endif           
             if (iy < 0)
               iy = 0;
             else if (iy > height - 1)

diff --git a/av1/common/x86/highbd_warp_plane_sse4.c b/av1/common/x86/highbd_warp_plane_sse4.c
index f228e47..1ad69ce 100644
--- a/av1/common/x86/highbd_warp_plane_sse4.c
+++ b/av1/common/x86/highbd_warp_plane_sse4.c

@@ -250,6 +250,7 @@
     const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
     int32_t sx4, int alpha, int beta, int p_height, int height, int i,
     const int offset_bits_horiz, const int reduce_bits_horiz) {
+
   int k;
   for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
     int iy = iy4 + k;
@@ -274,6 +275,7 @@
     const uint16_t *ref, __m128i *tmp, int stride, int32_t ix4, int32_t iy4,
     int32_t sx4, int alpha, int beta, int p_height, int height, int i,
     const int offset_bits_horiz, const int reduce_bits_horiz) {
+
   if (alpha == 0 && beta == 0)
     highbd_warp_horizontal_filter_alpha0_beta0(
         ref, tmp, stride, ix4, iy4, sx4, alpha, beta, p_height, height, i,
@@ -294,13 +296,255 @@
                                   reduce_bits_horiz);
 }
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+static __m128i strided_load_2x(const uint16_t *const src) {
+
+  __m128i control = _mm_setr_epi8(0, 1, 4, 5,
+                                  8, 9, 12, 13,
+                                  0x80, 0x80, 0x80, 0x80,
+                                  0x80, 0x80, 0x80, 0x80 );
+
+  // Load the first 8 values and de-interleave into the lowest 64 bits of data0
+  __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  __m128i data1 = _mm_shuffle_epi8( data0, control );
+
+  // Load the second 8 values and de-interleave in the lowest 64 bits of data2
+  __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  __m128i data3 = _mm_shuffle_epi8( data2, control );
+
+  // Combine the results
+  __m128i data4 = _mm_unpacklo_epi64( data1, data3);
+
+  return data4;
+
+}
+
+static __m128i strided_load_3x(const uint16_t *const src) {
+
+  //[*0 1 2 *3 4 5 *6 7][8 *9 10 11 *12 13 14 *15][16 17 *18 19 20 *21 22 23]
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control0 = _mm_setr_epi8(0, 1, 6, 7,
+                                         12, 13, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  const __m128i data1 = _mm_shuffle_epi8( data0, control0 );
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 2, 3,
+                                         8, 9, 14, 15,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  const __m128i data3 = _mm_shuffle_epi8( data2, control1 );
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control2 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         4, 5, 10, 11 );
+
+  const __m128i data4 = _mm_loadu_si128((__m128i *)src+2 );
+  const __m128i data5 = _mm_shuffle_epi8( data4, control2 );
+
+  // Combine the results
+  const __m128i data6 = _mm_blend_epi16(data1, data3, 0b00111000);
+  const __m128i data7 = _mm_blend_epi16(data6, data5, 0b11000000);
+
+  return data7;
+}
+
+static __m128i strided_load_4x(const uint16_t *const src) {
+
+  const __m128i control0 = _mm_setr_epi8(0, 1, 8, 9,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i control1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0, 1, 8, 9,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+
+  // Load the first 8 values and store the two desired values in the lowest 64 bits of data0
+  const __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  const __m128i data1 = _mm_shuffle_epi8( data0, control0 );
+
+  // Load the second 8 values and store the two desired values in the lowest 64 bits of data2
+  const __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  const __m128i data3 = _mm_shuffle_epi8( data2, control1 );
+
+  // Load the third 8 values and store the two desired values in the lowest 64 bits of data2
+  const __m128i data4 = _mm_loadu_si128((__m128i *)src+2 );
+  const __m128i data5 = _mm_shuffle_epi8( data4, control0 );
+
+  // Load the fourth 8 values and store the two desired values in the lowest 64 bits of data2
+  const __m128i data6 = _mm_loadu_si128((__m128i *)src+3 );
+  const __m128i data7 = _mm_shuffle_epi8( data6, control1 );
+
+  // Combine the results
+  /*
+  const __m128i data8 = _mm_blend_epi16(data1, data3, 0b00001100);
+  const __m128i data9 = _mm_blend_epi16(data5, data7, 0b00001100);
+  const __m128i data10 = _mm_unpacklo_epi64( (__m128i)data8, (__m128i)data9);
+  */
+  __m128i data8 = _mm_blend_epi16(data1, data3, 0b00001100);
+  __m128i data9 = _mm_blend_epi16(data5, data7, 0b00001100);
+  const __m128i data10 = _mm_unpacklo_epi64(data8, data9);
+
+  return data10;
+
+}
+
+static __m128i strided_load_6x(const uint16_t *const src) {
+
+  //[*0 1 2 3 4 5 *6 7][8 9 10 11 *12 13 14 15][16 17 *18 19 20 21 22 23]...
+
+  // Load data from the first 8 values and store in their correct location
+  const __m128i control0 = _mm_setr_epi8(0, 1, 12, 13,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  const __m128i data1 = _mm_shuffle_epi8( data0, control0 );
+
+  // Load data from the second 8 values and store in their correct location
+  const __m128i control1 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         8, 9, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data2 = _mm_loadu_si128((__m128i *)src+1 );
+  const __m128i data3 = _mm_shuffle_epi8( data2, control1 );
+
+  // Load data from the third 8 values and store in their correct location
+  const __m128i control2 = _mm_setr_epi8(0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 4, 5,
+                                         0x80, 0x80, 0x80, 0x80,
+                                         0x80, 0x80, 0x80, 0x80 );
+
+  const __m128i data4 = _mm_loadu_si128((__m128i *)src+2 );
+  const __m128i data5 = _mm_shuffle_epi8( data4, control2 );
+
+  // Combine the results
+  const __m128i data6 = _mm_blend_epi16(data1, data3, 0b00000100);
+  const __m128i data7 = _mm_blend_epi16(data6, data5, 0b00001000);
+
+  // Load data from the four 8 values and store in the low bits
+  const __m128i data8 = _mm_shuffle_epi8( _mm_loadu_si128((__m128i *)src+3 ), control0 );
+
+  // Load data from the four 8 values and store in the low bits
+  const __m128i data9 = _mm_shuffle_epi8( _mm_loadu_si128((__m128i *)src+4 ), control1 );
+
+  // Load data from the four 8 values and store in the low bits
+  const __m128i data10 = _mm_shuffle_epi8( _mm_loadu_si128((__m128i *)src+5 ), control2 );
+
+  // Combine the results
+  const __m128i data11 = _mm_blend_epi16(data8, data9, 0b00000100);
+  const __m128i data12 = _mm_blend_epi16(data11, data10, 0b00001000);
+  const __m128i data13 = _mm_unpacklo_epi64( data7, data12);
+
+  return data13;
+
+}
+
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+//Note: This function is typically called twice to load a total
+//of 16 elements.  This would be more efficient to do in a single
+//function, as there is some overlap between the data we are reading
+//in the first and second call.
+static __m128i interpolated_load_1_5x(const uint16_t *const src,
+                                      int first_sample_in_subpel,
+                                      int line_is_subpel,
+                                      int stride) {
+
+  __m128i control0, control1;
+
+  assert(first_sample_in_subpel < 2);
+  if( !first_sample_in_subpel){
+    control0 = _mm_setr_epi8( 0, 1, 0, 1, 2, 3, 4, 5,
+                              6, 7, 6, 7, 8, 9, 10, 11);
+
+    control1 = _mm_setr_epi8( 12, 13, 12, 13, 14, 15, 0, 1,
+                               2, 3, 2, 3, 4, 5, 6, 7);
+  }
+  else{
+
+    control0 = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 4, 5,
+                            6, 7, 8, 9, 10, 11, 10, 11);
+
+    control1 = _mm_setr_epi8(12, 13, 14, 15, 0, 1, 0, 1,
+                            2, 3, 4, 5, 6, 7, 6, 7);
+  }
+
+  // Load the first and second 8 values
+  __m128i data0 = _mm_loadu_si128((__m128i *)src );
+  __m128i data1 = _mm_loadu_si128((__m128i *)src+1 );
+
+  // Perform vertical interpolation if needed
+  if(line_is_subpel) {
+    const __m128i data0_1 = _mm_loadu_si128((__m128i *)(src + stride) );
+    const __m128i data1_1 = _mm_loadu_si128((__m128i *)(src + stride) + 1);
+
+    data0 = _mm_add_epi16(data0, data0_1);
+    data1 = _mm_add_epi16( data1, data1_1);
+  }
+
+  // Move the last two samples in data0 to data1.  While the ordering is not
+  // correct yet, this will give us the first six samples in the first register
+  // and the second six samples in the second register
+  uint16_t dummy[8];
+  _mm_storeu_si128( &dummy, data0);
+
+  uint16_t dummy1[8];
+  _mm_storeu_si128( &dummy1, data1);
+
+  const __m128i data3 = _mm_blend_epi16(data0, data1, 0b00111111);
+
+  _mm_storeu_si128( &dummy, data3);
+
+  // Shuffle the values so that we have the six samples in each register
+  // correctly ordered, and with the collocated samples duplicated
+  const __m128i data4 = _mm_shuffle_epi8( data0, control0);
+  _mm_storeu_si128( &dummy, data4);
+
+  const __m128i data5 = _mm_shuffle_epi8( data3, control1);
+  _mm_storeu_si128( &dummy, data5);
+
+  // Horizontal add
+  const __m128i data6 = _mm_hadd_epi16(data4, data5);
+  _mm_storeu_si128( &dummy, data6);
+
+  // Normalize with rounding
+  const __m128i data7 = _mm_add_epi16( data6, _mm_set1_epi16( line_is_subpel?2:1) );
+  _mm_storeu_si128( &dummy, data7);
+  const __m128i data8 = _mm_srli_epi16( data7, line_is_subpel?2:1);
+  _mm_storeu_si128( &dummy, data8);
+
+  return data8;
+
+}
+#endif
+#endif
+
 void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
                                    int width, int height, int stride,
                                    uint16_t *pred, int p_col, int p_row,
                                    int p_width, int p_height, int p_stride,
                                    int subsampling_x, int subsampling_y, int bd,
                                    ConvolveParams *conv_params, int16_t alpha,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                                   int16_t beta, int16_t gamma, int16_t delta,
+                                   const int x_step_qn, const int y_step_qn) {
+#else
                                    int16_t beta, int16_t gamma, int16_t delta) {
+#endif 
+
   __m128i tmp[15];
   int i, j, k;
   const int reduce_bits_horiz =
@@ -336,6 +580,17 @@
   const __m128i wt1 = _mm_set1_epi32(w1);
   const int use_wtd_comp_avg = is_uneven_wtd_comp_avg(conv_params);
 
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+  // Determine our stride
+#if !CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+  assert(x_step_qn == y_step_qn);
+#endif
+  const int x_conv_stride = x_step_qn >> SCALE_SUBPEL_BITS;
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+  const int mode_1_5x_flag = ( x_step_qn + ( 1 << (SCALE_SUBPEL_BITS - 2 ) ) ) >> (SCALE_SUBPEL_BITS -1 ) == 3 ? 1 : 0;
+#endif
+#endif
+
   /* Note: For this code to work, the left/right frame borders need to be
   extended by at least 13 pixels each. By the time we get here, other
   code will have set up this border, but we allow an explicit check
@@ -377,33 +632,161 @@
       // skip the expensive horizontal filter.
       if (ix4 <= -7) {
         for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+
           int iy = iy4 + k;
+
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          // Converting iy from the current frame resolution
+          // to the reference frame resolution.  For an integer
+          // relationship, this results in a strided operation.
+          // Results are a bit undefined for non-integer factors.
+          //
+          // We chose not to covert ix4 above, since we would
+          // also need to convert the -7 value in this case.
+          // The end result would be unchanged.
+          iy = x_conv_stride * iy;
+#endif
           if (iy < 0)
             iy = 0;
           else if (iy > height - 1)
             iy = height - 1;
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+          int src_iy = -1;
+          uint16_t value;
+          if( mode_1_5x_flag){
+
+            // Determine the line in the reference image that corresponds to the desired iy
+            src_iy = clamp(3 * (iy4 + k) / 2, 0, height - 1);
+
+            //uint16_t value;
+            if(src_iy % 3 == 1 && src_iy < (height-1))
+              value = ( ref[src_iy * stride] + ref[(src_iy+1) * stride] + 1 ) >> 1;
+            else
+              value = ref[src_iy * stride];
+
+            tmp[k + 7] = _mm_set1_epi16(
+                (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+                value * (1 << (FILTER_BITS - reduce_bits_horiz)));
+          }
+          else
+            tmp[k + 7] = _mm_set1_epi16(
+                (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+                ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
+
+#else
           tmp[k + 7] = _mm_set1_epi16(
               (1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
               ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
+#endif
         }
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+      } else if ( (ix4*x_conv_stride) >= ((width/x_conv_stride + 6) * x_conv_stride) && !mode_1_5x_flag) {
+#else
+      } else if ( (ix4*x_conv_stride) >= width + (6*x_conv_stride) ) {
+#endif
+#else
       } else if (ix4 >= width + 6) {
+#endif
         for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+
           int iy = iy4 + k;
+
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          // Converting iy from the current frame resolution
+          // to the reference frame resolution.  For an integer
+          // relationship, this results in a strided operation.
+          // Results are a bit undefined for non-integer factors.
+          iy = x_conv_stride * iy;
+#endif
           if (iy < 0)
             iy = 0;
           else if (iy > height - 1)
             iy = height - 1;
           tmp[k + 7] =
               _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
-                             ref[iy * stride + (width - 1)] *
+                             ref[iy * stride + width - width%x_conv_stride - x_conv_stride] *
                                  (1 << (FILTER_BITS - reduce_bits_horiz)));
         }
-      } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
-        const int out_of_boundary_left = -(ix4 - 6);
-        const int out_of_boundary_right = (ix4 + 8) - width;
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+      } else if ( ( (3*ix4/2) >= 3*(2*width/3 + 6)/2 ) && mode_1_5x_flag) {
 
         for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+
+          const int src_iy = clamp(3 * (iy4 + k) / 2, 0, height - 1);
+          const int sample_x = 3*(2*width/3 - 1) / 2;
+
+          uint16_t value;
+          if(src_iy % 3 == 1 && sample_x % 3 == 1 && src_iy < (height-1))
+            value = ( ref[src_iy * stride + sample_x]
+                     + ref[src_iy * stride + sample_x + 1]
+                     + ref[(src_iy+1) * stride + sample_x]
+                     + ref[(src_iy+1) * stride + sample_x + 1] + 2 ) / 4;
+          else if(src_iy % 3 == 1)
+            value = ( ref[src_iy * stride + sample_x]
+                     + ref[(src_iy+1) * stride + sample_x] + 1 ) / 2;
+          else if(sample_x % 3 == 1)
+            value = ( ref[src_iy * stride + sample_x]
+                     + ref[src_iy * stride + sample_x + 1] + 1 ) / 2;
+          else
+            value = ref[src_iy * stride + sample_x];
+
+          tmp[k + 7] =
+              _mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
+                             value *
+                                 (1 << (FILTER_BITS - reduce_bits_horiz)));
+        }
+#endif
+
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP  && !CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+      // Always use the code below if the reference frame and current frame have
+      // different resolutions.
+      } else if ( ( ((ix4 - 7) < 0) || ((ix4 + 9) > width)) || x_conv_stride != 1 ){
+#elif CONFIG_2D_SR_SUBSAMPLE_FOR_WARP && CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+      } else if ( ( ((ix4 - 7) < 0) || ((ix4 + 9) > width)) || x_conv_stride != 1 || mode_1_5x_flag){
+#else
+      } else if (((ix4 - 7) < 0) || ((ix4 + 9) > width)) {
+#endif
+        const int out_of_boundary_left = -(ix4 - 6);
+
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP && ! CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+        // The full resolution code may have an off-by-one error
+        const int out_of_boundary_right = (ix4 + 8) - width/x_conv_stride;
+#elif CONFIG_2D_SR_SUBSAMPLE_FOR_WARP && CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+        int out_of_boundary_right;
+        if( !mode_1_5x_flag)
+          out_of_boundary_right = (ix4 + 8) - width/x_conv_stride;
+        else
+          out_of_boundary_right = (ix4 + 8) - 2*width/3;
+#else
+        const int out_of_boundary_right = (ix4 + 8) - width;
+#endif
+
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+        // Converting ix4 from the current frame resolution
+        // to the reference frame resolution.  For an integer
+        // relationship, this results in a strided operation.
+        // Results are a bit undefined for non-integer factors.
+        ix4 = x_conv_stride * ix4;
+#endif
+        for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
+
           int iy = iy4 + k;
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          // Converting iy from the current frame resolution
+          // to the reference frame resolution.  For an integer
+          // relationship, this results in a strided operation.
+          // Results are a bit undefined for non-integer factors.
+          iy = x_conv_stride * iy;
+
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+          // Converting iy from the current frame resolution
+          // to the reference frame resolution when the sample
+          // factor is 1.5x.
+          if(mode_1_5x_flag)
+            iy = 3 * iy / 2;
+#endif
+#endif
           if (iy < 0)
             iy = 0;
           else if (iy > height - 1)
@@ -411,11 +794,54 @@
           int sx = sx4 + beta * (k + 4);
 
           // Load source pixels
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+          __m128i src, src2;
+
+#if CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP
+          if(mode_1_5x_flag==1 && x_conv_stride==1) {
+
+            // Compute 3 * (ix4 - 7) / 2 with rounding toward -infinity
+            int ix4_src = 3 * (ix4 - 7) / 2;
+            ix4_src -= (ix4_src%3==-1);
+
+            src = interpolated_load_1_5x(ref + iy * stride + ix4_src,
+                                         ix4_src%3, iy%3==1, stride);
+
+            src2 = interpolated_load_1_5x(ref + iy * stride + ix4_src + 12,
+                                         ix4_src%3, iy%3==1, stride);
+          }
+          else if(x_conv_stride==1) {
+#else
+          if(x_conv_stride==1) {
+#endif
+            src  = _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
+            src2 = _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
+          }
+          else if(x_conv_stride==2 ) {
+            src = strided_load_2x(ref + iy * stride + ix4 - 7*2);
+            src2 = strided_load_2x(ref + iy * stride + ix4 + 1*2);
+          }
+          else if(x_conv_stride==3) {
+            src = strided_load_3x(ref + iy * stride + ix4 - 7*3);
+            src2 = strided_load_3x(ref + iy * stride + ix4 + 1*3);
+          }
+          else if(x_conv_stride==4) {
+            src = strided_load_4x(ref + iy * stride + ix4 - 7*4);
+            src2 = strided_load_4x(ref + iy * stride + ix4 + 1*4);
+          }
+          else if(x_conv_stride==6) {
+            src = strided_load_6x(ref + iy * stride + ix4 - 7*6);
+            src2 = strided_load_6x(ref + iy * stride + ix4 + 1*6);
+          }
+          else{
+            assert(0);
+          }
+#else
           const __m128i src =
               _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 - 7));
           const __m128i src2 =
               _mm_loadu_si128((__m128i *)(ref + iy * stride + ix4 + 1));
-
+#endif
           const __m128i src_01 = _mm_shuffle_epi8(
               src, _mm_loadu_si128((__m128i *)warp_highbd_arrange_bytes));
           const __m128i src2_01 = _mm_shuffle_epi8(

diff --git a/av1/common/x86/highbd_wiener_convolve_avx2.c b/av1/common/x86/highbd_wiener_convolve_avx2.c
index 04451db..6543ac1 100644
--- a/av1/common/x86/highbd_wiener_convolve_avx2.c
+++ b/av1/common/x86/highbd_wiener_convolve_avx2.c

@@ -2551,6 +2551,9 @@
   int col_base = col_offset + tskip_lead;
   const int tskip_length = tskip_lead + tskip_lag + 1;
   assert(col_base >= 0);
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  assert(tskip_sum_buf[col_base] >= 0);   
+#endif
   tskip_feature_accum[0] += tskip_sum_buf[col_base];
   col_base++;
   int cl = col_base - tskip_length;

diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index 71a5da3..4248f3d 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c

@@ -76,9 +76,15 @@
 
 // This is needed by ext_tile related unit tests.
 #define EXT_TILE_DEBUG 1
+#if CONFIG_2D_SR_SCALE_EXT
+#define MC_TEMP_BUF_PELS                       \
+  (((MAX_SB_SIZE)*6 + (AOM_INTERP_EXTEND)*2) * \
+   ((MAX_SB_SIZE)*6 + (AOM_INTERP_EXTEND)*2))  
+#else  // CONFIG_2D_SR_SCALE_EXT
 #define MC_TEMP_BUF_PELS                       \
   (((MAX_SB_SIZE)*2 + (AOM_INTERP_EXTEND)*2) * \
    ((MAX_SB_SIZE)*2 + (AOM_INTERP_EXTEND)*2))
+#endif  // CONFIG_2D_SR_SCALE_EXT
 
 #if CONFIG_THROUGHPUT_ANALYSIS
 int64_t tot_ctx_syms = { 0 };
@@ -687,6 +693,7 @@
       (is_scaled || scaled_mv.col || scaled_mv.row || (frame_width & 0x7) ||
        (frame_height & 0x7))) {
 #endif  // CONFIG_OPTFLOW_REFINEMENT || CONFIG_TIP
+
     if (subpel_x_mv || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
       block->x0 -= AOM_INTERP_EXTEND - 1;
       block->x1 += AOM_INTERP_EXTEND;
@@ -724,7 +731,6 @@
                                      ,
                                      NULL
 #endif  // CONFIG_REFINEMV
-
                                      )) {
     // Get reference block pointer.
     const uint16_t *const buf_ptr =
@@ -742,6 +748,7 @@
            x_pad * (AOM_INTERP_EXTEND - 1);
   }
 }
+
 #if !CONFIG_REFINEMV
 static void dec_calc_subpel_params(
     const MV *const src_mv, InterPredParams *const inter_pred_params,
@@ -751,6 +758,7 @@
     int use_optflow_refinement,
 #endif  // CONFIG_OPTFLOW_REFINEMENT
     MV32 *scaled_mv, int *subpel_x_mv, int *subpel_y_mv) {
+
   const struct scale_factors *sf = inter_pred_params->scale_factors;
   struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -788,19 +796,120 @@
     pos_x += SCALE_EXTRA_OFF;
     pos_y += SCALE_EXTRA_OFF;
 
+#if CONFIG_2D_SR_ZERO_PHASE
+    // TODO: Determine plane type from something other than ssx, ssy
+    if (sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += (ssx == 1) ? inter_pred_params->posx_offset[1]
+                          : inter_pred_params->posx_offset[0];
+    }
+    if (sf->y_scale_fp != REF_NO_SCALE) {
+      pos_y += (ssy == 1) ? inter_pred_params->posy_offset[1]
+                          : inter_pred_params->posy_offset[0];
+    }
+#elif CONFIG_2D_SR_MC_PHASE_FIX
+    if (ssx == 1 && sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += inter_pred_params->posx_offset[1];
+    }
+#endif
+
     const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
     const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
+
     const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
                        << SCALE_SUBPEL_BITS;
     const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, src_mv, bw, bh, use_optflow_refinement, inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+      int subbpel_pos_x = ((inter_pred_params->pix_col << SUBPEL_BITS) + mv_q4.col) << SCALE_EXTRA_BITS;
+      int subbpel_pos_y = ((inter_pred_params->pix_row << SUBPEL_BITS) + mv_q4.row) << SCALE_EXTRA_BITS;
+      subpel_params->subpel_x = subbpel_pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = subbpel_pos_y & SCALE_SUBPEL_MASK;
+    } else {
+      subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    }
+#else
     subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+#endif
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      int scale = 0;
+      if (sf->x_scale_fp == REF_2x_SCALE) scale = 2;
+      if (sf->x_scale_fp == REF_3x_SCALE) scale = 3;
+      if (sf->x_scale_fp == REF_4x_SCALE) scale = 4;
+      if (sf->x_scale_fp == REF_6x_SCALE) scale = 6;
+      assert(scale != 0);
+
+      inter_pred_params->conv_params.stride_scale = scale;
+
+      int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+      int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+      const MV orig_mv_q4 = clamp_mv_to_umv_border_sb(
+          xd, src_mv, bw, bh, use_optflow_refinement,
+          inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+
+      orig_pos_x += orig_mv_q4.col;
+      orig_pos_y += orig_mv_q4.row;
+
+      orig_pos_y = clamp(((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         top, bottom);
+      orig_pos_x = clamp(((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         left, right);
+
+      // Get reference block top left coordinate.
+      block->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      block->x1 = ((orig_pos_x +
+                    (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+                   SCALE_SUBPEL_BITS) +
+                  scale;
+      block->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) *
+                                     subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  scale;
+    } else {
+      inter_pred_params->conv_params.stride_scale = 1;
+
+      // Get reference block top left coordinate.
+      block->x0 = pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = pos_y >> SCALE_SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      block->x1 =
+          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          1;
+      block->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
+                                subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  1;
+    }
+
+    MV temp_mv;
+      temp_mv = clamp_mv_to_umv_border_sb(xd, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                          use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                          inter_pred_params->subsampling_x,
+                                          inter_pred_params->subsampling_y);
+      *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf);
+      scaled_mv->row += SCALE_EXTRA_OFF;
+      scaled_mv->col += SCALE_EXTRA_OFF;
+
+      *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
+      *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#else
     // Get reference block top left coordinate.
     block->x0 = pos_x >> SCALE_SUBPEL_BITS;
     block->y0 = pos_y >> SCALE_SUBPEL_BITS;
@@ -828,6 +937,7 @@
 
     *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
     *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#endif
   } else {
     // Get block position in current frame.
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
@@ -920,6 +1030,7 @@
     int use_optflow_refinement,
 #endif  // CONFIG_OPTFLOW_REFINEMENT
     MV32 *scaled_mv, int *subpel_x_mv, int *subpel_y_mv) {
+
   const struct scale_factors *sf = inter_pred_params->scale_factors;
   struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
 
@@ -964,6 +1075,22 @@
     pos_x += SCALE_EXTRA_OFF;
     pos_y += SCALE_EXTRA_OFF;
 
+#if CONFIG_2D_SR_ZERO_PHASE
+    // TODO: Determine plane type from something other than ssx, ssy
+    if (sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += (ssx == 1) ? inter_pred_params->posx_offset[1]
+                          : inter_pred_params->posx_offset[0];
+    }
+    if (sf->y_scale_fp != REF_NO_SCALE) {
+      pos_y += (ssy == 1) ? inter_pred_params->posy_offset[1]
+                          : inter_pred_params->posy_offset[0];
+    }
+#elif CONFIG_2D_SR_MC_PHASE_FIX
+    if (ssx == 1 && sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += inter_pred_params->posx_offset[1];
+    }
+#endif
+
     const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
     const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
     const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
@@ -972,11 +1099,100 @@
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      const MV mv_q4 = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh, use_optflow_refinement, inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+      int subbpel_pos_x = ((inter_pred_params->pix_col << SUBPEL_BITS) + mv_q4.col) << SCALE_EXTRA_BITS;
+      int subbpel_pos_y = ((inter_pred_params->pix_row << SUBPEL_BITS) + mv_q4.row) << SCALE_EXTRA_BITS;
+      subpel_params->subpel_x = subbpel_pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = subbpel_pos_y & SCALE_SUBPEL_MASK;
+    } else {
+      subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    }
+#else
     subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+#endif
+
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      int scale = 0;
+      if (sf->x_scale_fp == REF_2x_SCALE) scale = 2;
+      if (sf->x_scale_fp == REF_3x_SCALE) scale = 3;
+      if (sf->x_scale_fp == REF_4x_SCALE) scale = 4;
+      if (sf->x_scale_fp == REF_6x_SCALE) scale = 6;
+      assert(scale != 0);
+      inter_pred_params->conv_params.stride_scale = scale;
+
+      int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+      int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+    const MV orig_mv_q4 = tip_clamp_mv_to_umv_border_sb(
+          inter_pred_params, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+          use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+
+      orig_pos_x += orig_mv_q4.col;
+      orig_pos_y += orig_mv_q4.row;
+
+      orig_pos_y =
+          clamp(((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale, top,
+                bottom);
+      orig_pos_x =
+          clamp(((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale, left,
+                right);
+
+
+      // Get reference block top left coordinate.
+      block->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      block->x1 = ((orig_pos_x +
+                    (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          scale;
+      block->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) *
+                                subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  scale;
+    } else {
+      inter_pred_params->conv_params.stride_scale = 1;
+
+      // Get reference block top left coordinate.
+      block->x0 = pos_x >> SCALE_SUBPEL_BITS;
+      block->y0 = pos_y >> SCALE_SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      block->x1 =
+          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          1;
+      block->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
+                                subpel_params->ys) >>
+                   SCALE_SUBPEL_BITS) +
+                  1;
+    }
+      MV temp_mv;
+    temp_mv = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                            use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                            inter_pred_params->subsampling_x,
+                                            inter_pred_params->subsampling_y);
+      *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf);
+      scaled_mv->row += SCALE_EXTRA_OFF;
+      scaled_mv->col += SCALE_EXTRA_OFF;
+
+      *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
+      *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#else
     // Get reference block top left coordinate.
     block->x0 = pos_x >> SCALE_SUBPEL_BITS;
     block->y0 = pos_y >> SCALE_SUBPEL_BITS;
@@ -1011,6 +1227,7 @@
 
     *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
     *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+#endif
   } else {
     // Get block position in current frame.
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
@@ -2421,6 +2638,40 @@
 }
 #endif
 
+
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+static AOM_INLINE void decode_partition_loop_restoration(AV1Decoder *const pbi,
+	ThreadData *const td, int mi_row,
+	int mi_col, aom_reader *reader,
+	BLOCK_SIZE bsize) {
+	AV1_COMMON *const cm = &pbi->common;
+	const int num_planes = av1_num_planes(cm);
+	DecoderCodingBlock *const dcb = &td->dcb;
+	MACROBLOCKD *const xd = &dcb->xd;
+	assert(bsize == cm->seq_params.sb_size);
+
+	for (int plane = 0; plane < num_planes; plane++) {
+		int rcol0, rcol1, rrow0, rrow1;
+		if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                     || cm->rst_info[plane].frame_cross_restoration_type != RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                        ) &&
+			av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
+				&rcol0, &rcol1, &rrow0, &rrow1)) {
+			const int rstride = cm->rst_info[plane].horz_units_per_tile;
+			for (int rrow = rrow0; rrow < rrow1; ++rrow) {
+				for (int rcol = rcol0; rcol < rcol1; ++rcol) {
+					const int runit_idx = rcol + rrow * rstride;
+					loop_restoration_read_sb_coeffs(cm, xd, reader, plane, runit_idx);
+				}
+			}
+		}
+	}
+}
+#endif
+
+
 // TODO(slavarnway): eliminate bsize and subsize in future commits
 static AOM_INLINE void decode_partition(AV1Decoder *const pbi,
                                         ThreadData *const td, int mi_row,
@@ -2480,6 +2731,9 @@
     const int plane_start = get_partition_plane_start(xd->tree_type);
     const int plane_end =
         get_partition_plane_end(xd->tree_type, av1_num_planes(cm));
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+    if (!av1_superres_scaled(cm)) {
+#endif
     for (int plane = plane_start; plane < plane_end; ++plane) {
       int rcol0, rcol1, rrow0, rrow1;
       if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE
@@ -2498,6 +2752,9 @@
         }
       }
     }
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+    }
+#endif
 
     ptree->bsize = bsize;
     ptree->mi_row = mi_row;
@@ -3002,7 +3259,7 @@
 #if CONFIG_LR_FLEX_SYNTAX
     uint8_t plane_lr_tools_disable_mask =
         cm->seq_params.lr_tools_disable_mask[p > 0];
-#if CONFIG_PC_WIENER
+#if CONFIG_PC_WIENER && !CONFIG_2D_SR_PC_WIENER_ENABLE_FOR_SR
     // If superres is used turn off PC_WIENER since tx_skip values will
     // be misaligned.
     if (av1_superres_scaled(cm))
@@ -3093,7 +3350,11 @@
   const int frame_height = cm->superres_upscaled_height;
   set_restoration_unit_size(frame_width, frame_height,
                             cm->seq_params.subsampling_x,
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+                            cm->seq_params.subsampling_y, cm->rst_info, cm->superres_scale_denominator);
+#else                            
                             cm->seq_params.subsampling_y, cm->rst_info);
+#endif    
   int size = cm->rst_info[0].max_restoration_unit_size;
 
   cm->rst_info[0].restoration_unit_size =
@@ -3108,6 +3369,8 @@
         cm->rst_info[0].restoration_unit_size = size >> 2;
     }
   }
+
+
   if (num_planes > 1) {
     cm->rst_info[1].restoration_unit_size =
         cm->rst_info[1].max_restoration_unit_size;
@@ -3135,7 +3398,6 @@
       cm->rst_info[p].restoration_unit_size = sb_size;
 
     RestorationInfo *rsi = &cm->rst_info[0];
-
     if (sb_size == 64) {
       rsi->restoration_unit_size <<= aom_rb_read_bit(rb);
     }
@@ -3890,11 +4152,25 @@
   cm->superres_upscaled_width = *width;
   cm->superres_upscaled_height = *height;
   cm->superres_scale_denominator = SCALE_NUMERATOR;
+#if CONFIG_2D_SR
+  cm->superres_scale_numerator = SCALE_NUMERATOR;
+#endif  // CONFIG_2D_SR
 
   const SequenceHeader *const seq_params = &cm->seq_params;
   if (!seq_params->enable_superres) return;
 
   if (aom_rb_read_bit(rb)) {
+#if CONFIG_2D_SR
+    cm->superres_scale_index =
+        (uint8_t)aom_rb_read_literal(rb, SUPERRES_SCALE_BITS);
+    cm->superres_scale_denominator =
+        superres_scales[cm->superres_scale_index].scale_denom;
+    cm->superres_scale_numerator =
+        superres_scales[cm->superres_scale_index].scale_num;
+    av1_calculate_scaled_superres_size(width, height,
+                                       cm->superres_scale_denominator,
+                                       cm->superres_scale_numerator);
+#else   // CONFIG_2D_SR
     cm->superres_scale_denominator =
         (uint8_t)aom_rb_read_literal(rb, SUPERRES_SCALE_BITS);
     cm->superres_scale_denominator += SUPERRES_SCALE_DENOMINATOR_MIN;
@@ -3902,6 +4178,7 @@
     // resized correctly
     av1_calculate_scaled_superres_size(width, height,
                                        cm->superres_scale_denominator);
+#endif  // CONFIG_2D_SR
   } else {
     // 1:1 scaling - ie. no scaling, scale not provided
     cm->superres_scale_denominator = SCALE_NUMERATOR;
@@ -3972,6 +4249,16 @@
     tip_frame_buf->render_width = cm->render_width;
     tip_frame_buf->render_height = cm->render_height;
   }
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  if (aom_realloc_frame_buffer(
+          &cm->tip_ref.upscaled_tip_frame_buf, cm->superres_upscaled_width,
+          cm->superres_upscaled_height, seq_params->subsampling_x,
+          seq_params->subsampling_y, AOM_DEC_BORDER_IN_PIXELS,
+          cm->features.byte_alignment, NULL, NULL, NULL)) {
+    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+                       "Failed to allocate frame buffer");
+  }
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 #endif  // CONFIG_TIP
 
@@ -4759,6 +5046,29 @@
 #endif  // CONFIG_WIENER_NONSEP
   );
 
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+  if (av1_superres_scaled(cm)) {
+	  int scaled_mi_row_end = tile_info.mi_row_end * cm->superres_scale_denominator / SCALE_NUMERATOR;
+	  int scaled_mi_col_end = tile_info.mi_col_end * cm->superres_scale_denominator / SCALE_NUMERATOR;
+	  for (int mi_row = tile_info.mi_row_start; mi_row < scaled_mi_row_end;
+		  mi_row += cm->seq_params.mib_size) {
+//		  av1_zero_left_context(xd);
+		  for (int mi_col = tile_info.mi_col_start; mi_col < scaled_mi_col_end;
+			  mi_col += cm->seq_params.mib_size) {
+//			  av1_reset_is_mi_coded_map(xd, cm->seq_params.mib_size);
+
+ 			  decode_partition_loop_restoration(pbi, td, mi_row, mi_col, td->bit_reader,
+			  cm->seq_params.sb_size);
+
+			  if (aom_reader_has_overflowed(td->bit_reader)) {
+				  aom_merge_corrupted_flag(&dcb->corrupted, 1);
+				  return;
+			  }
+		  }
+	  }
+  }
+#endif
+
   for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
        mi_row += cm->seq_params.mib_size) {
     av1_zero_left_context(xd);
@@ -5652,6 +5962,7 @@
       thread_data->error_info.setjmp = 0;
     }
   }
+
   const int buf_size = MC_TEMP_BUF_PELS << 1;
   for (worker_idx = 0; worker_idx < pbi->max_threads - 1; ++worker_idx) {
     DecWorkerData *const thread_data = pbi->thread_data + worker_idx;
@@ -7465,11 +7776,13 @@
           aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                              "Invalid TIP mode.");
         }
+#if !CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
         if (features->tip_frame_mode == TIP_FRAME_AS_OUTPUT &&
             av1_superres_scaled(cm)) {
           aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
                              "Invalid TIP Direct mode with superres.");
         }
+#endif  // !CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 
         if (features->tip_frame_mode && cm->seq_params.enable_tip_hole_fill) {
           features->allow_tip_hole_fill = aom_rb_read_bit(rb);
@@ -7920,6 +8233,9 @@
     av1_copy_tip_frame_tmvp_mvs(cm);
     aom_yv12_copy_frame(&cm->tip_ref.tip_frame->buf, &cm->cur_frame->buf,
                         num_planes);
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+    superres_post_decode(pbi);
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
     for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
       cm->global_motion[i] = default_warp_params;
       cm->cur_frame->global_motion[i] = default_warp_params;
@@ -8097,6 +8413,7 @@
   }
 
   if (!is_global_intrabc_allowed(cm) && !tiles->single_tile_decoding) {
+
     if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
       if (pbi->num_workers > 1) {
         av1_loop_filter_frame_mt(
@@ -8191,7 +8508,11 @@
         !do_cdef && !do_superres;
 
     if (!optimized_loop_restoration) {
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR
+      if (do_loop_restoration && !do_superres)
+#else      
       if (do_loop_restoration)
+#endif      
         av1_loop_restoration_save_boundary_lines(&pbi->common.cur_frame->buf,
                                                  cm, 0);
 

diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index a0eaf64..b605224 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c

@@ -1896,7 +1896,9 @@
 
 #if CONFIG_ADAPTIVE_MVD
   int use_mv_class_offset = 1;
+#if !CONFIG_ADAPTIVE_MVD_TEST2
   if (mv_class > MV_CLASS_0 && is_adaptive_mvd) use_mv_class_offset = 0;
+#endif  
   if (use_mv_class_offset) {
 #endif  // CONFIG_ADAPTIVE_MVD
     // Integer part
@@ -1917,7 +1919,7 @@
     const int n = mv_class + CLASS0_BITS - 1;  // number of bits
     d = 0;
     for (int i = 0; i < n; ++i) d |= 1 << i;
-    mag = CLASS0_SIZE << (mv_class + 2);
+    mag = CLASS0_SIZE << (mv_class + 2);   
   }
 #endif  // CONFIG_ADAPTIVE_MVD
 

diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index 4bb23aa..61b6d66 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c

@@ -144,6 +144,10 @@
 static INLINE void dec_init_tip_ref_frame(AV1_COMMON *const cm) {
   TIP *tip_ref = &cm->tip_ref;
   tip_ref->tip_frame = aom_calloc(1, sizeof(*tip_ref->tip_frame));
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  memset(&tip_ref->upscaled_tip_frame_buf, 0,
+         sizeof(tip_ref->upscaled_tip_frame_buf));
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 
 static INLINE void dec_free_tip_ref_frame(AV1_COMMON *const cm) {
@@ -155,6 +159,9 @@
   aom_free_frame_buffer(&cm->tip_ref.tip_frame->buf);
   aom_free(cm->tip_ref.tip_frame);
   cm->tip_ref.tip_frame = NULL;
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  aom_free_frame_buffer(&cm->tip_ref.upscaled_tip_frame_buf);
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 
 #if CONFIG_OPTFLOW_ON_TIP

diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index f0d06ab..847c077 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c

@@ -2904,7 +2904,7 @@
   if (!mbmi->skip_txfm[xd->tree_type == CHROMA_PART]) {
     write_tokens_b(cpi, w, tok, tok_end);
   }
-#if CONFIG_PC_WIENER
+#if CONFIG_PC_WIENER && !CONFIG_2D_SR_SET_TX_SKIP_ZERO
   else if (!is_global_intrabc_allowed(cm) && !cm->features.coded_lossless) {
     // Assert only when LR is enabled.
     assert(1 == av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, 0, 0, 0));
@@ -3032,6 +3032,41 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 }
 
+
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+static AOM_INLINE void write_modes_sb_loop_restoration(
+	AV1_COMP *const cpi, aom_writer *const w,
+	int mi_row, int mi_col, BLOCK_SIZE bsize) {
+	const AV1_COMMON *const cm = &cpi->common;
+	const int num_planes = av1_num_planes(cm);
+	MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+	assert(bsize == cm->seq_params.sb_size);
+
+	for (int plane = 0; plane < num_planes; plane++) {
+		int rcol0, rcol1, rrow0, rrow1;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE ||
+                     cm->rst_info[plane].frame_cross_restoration_type != RESTORE_NONE) &&
+#else
+		if (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE &&
+#endif
+			av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
+				&rcol0, &rcol1, &rrow0, &rrow1)) {
+			const int rstride = cm->rst_info[plane].horz_units_per_tile;
+			for (int rrow = rrow0; rrow < rrow1; ++rrow) {
+				for (int rcol = rcol0; rcol < rcol1; ++rcol) {
+					const int runit_idx = rcol + rrow * rstride;
+					const RestorationUnitInfo *rui =
+						&cm->rst_info[plane].unit_info[runit_idx];
+					loop_restoration_write_sb_coeffs(cm, xd, rui, w, plane,
+						cpi->td.counts);
+				}
+			}
+		}
+	}
+}
+#endif
+
 static AOM_INLINE void write_modes_sb(
     AV1_COMP *const cpi, const TileInfo *const tile, aom_writer *const w,
     const TokenExtra **tok, const TokenExtra *const tok_end,
@@ -3064,6 +3099,9 @@
   const int plane_start = get_partition_plane_start(xd->tree_type);
   const int plane_end =
       get_partition_plane_end(xd->tree_type, av1_num_planes(cm));
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+  if (!av1_superres_scaled(cm)) {
+#endif
   for (int plane = plane_start; plane < plane_end; ++plane) {
     int rcol0, rcol1, rrow0, rrow1;
 #if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
@@ -3086,6 +3124,9 @@
       }
     }
   }
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+  }
+#endif
 
 #if CONFIG_EXT_RECUR_PARTITIONS
   write_partition(cm, xd, mi_row, mi_col, partition, bsize, ptree, ptree_luma,
@@ -3320,7 +3361,21 @@
       av1_reset_loop_filter_delta(xd, num_planes);
     }
   }
-
+#if CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB
+  if (av1_superres_scaled(cm)) {
+	  int scaled_mi_row_end = mi_row_end * cm->superres_scale_denominator / SCALE_NUMERATOR;
+	  int scaled_mi_col_end = mi_col_end * cm->superres_scale_denominator / SCALE_NUMERATOR;
+	  for (int mi_row = mi_row_start; mi_row < scaled_mi_row_end;
+		  mi_row += cm->seq_params.mib_size) {
+//		  av1_zero_left_context(xd);
+		  for (int mi_col = mi_col_start; mi_col < scaled_mi_col_end;
+			  mi_col += cm->seq_params.mib_size) {
+//			  av1_reset_is_mi_coded_map(xd, cm->seq_params.mib_size);
+			  write_modes_sb_loop_restoration(cpi, w, mi_row, mi_col, cm->seq_params.sb_size);
+		  }
+	  }
+  }
+#endif
   for (int mi_row = mi_row_start; mi_row < mi_row_end;
        mi_row += cm->seq_params.mib_size) {
     const int sb_row_in_tile =
@@ -3532,7 +3587,7 @@
     RestorationInfo *rsi = &cm->rst_info[0];
 
     assert(rsi->restoration_unit_size >= sb_size);
-    assert(RESTORATION_UNITSIZE_MAX == 256);
+	assert(RESTORATION_UNITSIZE_MAX == 256);
     if (sb_size == 64) {
       aom_wb_write_bit(wb, rsi->restoration_unit_size > 64);
     }
@@ -3540,7 +3595,6 @@
       aom_wb_write_bit(wb, rsi->restoration_unit_size > 128);
     }
   }
-
   if (num_planes > 1) {
     int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y);
     if (s && !chroma_none) {
@@ -4396,21 +4450,47 @@
                                             struct aom_write_bit_buffer *wb) {
   const SequenceHeader *const seq_params = &cm->seq_params;
   if (!seq_params->enable_superres) {
+#if CONFIG_2D_SR
+    assert(cm->superres_scale_denominator == cm->superres_scale_numerator);
+#else   // CONFIG_2D_SR
     assert(cm->superres_scale_denominator == SCALE_NUMERATOR);
+#endif  // CONFIG_2D_SR
     return;
   }
 
   // First bit is whether to to scale or not
+#if CONFIG_2D_SR
+  if (cm->superres_scale_denominator == cm->superres_scale_numerator) {
+#else                         // CONFIG_2D_SR
   if (cm->superres_scale_denominator == SCALE_NUMERATOR) {
+#endif                        // CONFIG_2D_SR
     aom_wb_write_bit(wb, 0);  // no scaling
   } else {
     aom_wb_write_bit(wb, 1);  // scaling, write scale factor
+#if CONFIG_2D_SR
+    assert(cm->superres_scale_denominator > cm->superres_scale_numerator);
+#if CONFIG_2D_SR_SCALE_EXT
+    // Current across-scale prediction can handle downsampling factor <= 6
+    assert(cm->superres_scale_denominator <= 6 * cm->superres_scale_numerator);
+#else  // CONFIG_2D_SR_SCALE_EXT
+    // Current across-scale prediction can handle downsampling factor <= 2
+    assert(cm->superres_scale_denominator <= 2 * cm->superres_scale_numerator);
+#endif  // CONFIG_2D_SR_SCALE_EXT
+    assert(cm->superres_scale_index >= 0);
+    assert(cm->superres_scale_index < SUPERRES_SCALES);
+    assert(cm->superres_scale_denominator ==
+           superres_scales[cm->superres_scale_index].scale_denom);
+    assert(cm->superres_scale_numerator ==
+           superres_scales[cm->superres_scale_index].scale_num);
+    aom_wb_write_literal(wb, cm->superres_scale_index, SUPERRES_SCALE_BITS);
+#else   // CONFIG_2D_SR
     assert(cm->superres_scale_denominator >= SUPERRES_SCALE_DENOMINATOR_MIN);
     assert(cm->superres_scale_denominator <
            SUPERRES_SCALE_DENOMINATOR_MIN + (1 << SUPERRES_SCALE_BITS));
     aom_wb_write_literal(
         wb, cm->superres_scale_denominator - SUPERRES_SCALE_DENOMINATOR_MIN,
         SUPERRES_SCALE_BITS);
+#endif  // CONFIG_2D_SR
   }
 }
 
@@ -5362,8 +5442,10 @@
 #endif  // CONFIG_PEF
 #if CONFIG_TIP
       if (cm->seq_params.enable_tip) {
+#if !CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
         assert(IMPLIES(av1_superres_scaled(cm),
                        features->tip_frame_mode != TIP_FRAME_AS_OUTPUT));
+#endif  // !CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
         aom_wb_write_literal(wb, features->tip_frame_mode, 2);
         if (features->tip_frame_mode && cm->seq_params.enable_tip_hole_fill) {
           aom_wb_write_bit(wb, features->allow_tip_hole_fill);

diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index a12f827..292458b 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c

@@ -448,15 +448,27 @@
   return rd;
 }
 
+#if CONFIG_2D_SR_MC_PHASE_FIX
+static AOM_INLINE void get_inter_predictor_masked_compound_y(
+    MACROBLOCK *x, const BLOCK_SIZE bsize, uint16_t *pred0, uint16_t *pred1,
+    int16_t *residual1, int16_t *diff10, int stride,
+    const struct AV1Common *const cm) {
+#else
 static AOM_INLINE void get_inter_predictor_masked_compound_y(
     MACROBLOCK *x, const BLOCK_SIZE bsize, uint16_t *pred0, uint16_t *pred1,
     int16_t *residual1, int16_t *diff10, int stride) {
+#endif
   MACROBLOCKD *xd = &x->e_mbd;
   const int bw = block_size_wide[bsize];
   const int bh = block_size_high[bsize];
   // get inter predictors to use for masked compound modes
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  av1_build_inter_predictor_single_buf_y(xd, bsize, 0, pred0, stride, cm);
+  av1_build_inter_predictor_single_buf_y(xd, bsize, 1, pred1, stride, cm);
+#else
   av1_build_inter_predictor_single_buf_y(xd, bsize, 0, pred0, stride);
   av1_build_inter_predictor_single_buf_y(xd, bsize, 1, pred1, stride);
+#endif
   const struct buf_2d *const src = &x->plane[0].src;
 
   aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, pred1,
@@ -746,8 +758,8 @@
     int rate_sum, skip_txfm_sb;
     int64_t dist_sum, skip_sse_sb;
     // get negative of mask
-    const uint8_t *mask =
-        av1_get_contiguous_soft_mask(mbmi->interintra_wedge_index, 1, bsize);
+	const uint8_t *mask =
+		av1_get_contiguous_soft_mask(mbmi->interintra_wedge_index, 1, bsize);
     av1_compound_single_motion_search(cpi, x, bsize, &tmp_mv->as_mv,
 #if CONFIG_JOINT_MVD
                                       &tmp_mv->as_mv,
@@ -1155,8 +1167,13 @@
   // this may increase memory requirements as compound segment mask needs to be
   // stored in each record.
   if (*calc_pred_masked_compound) {
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    get_inter_predictor_masked_compound_y(x, bsize, pred0, pred1, residual1,
+                                          diff10, stride, cm);
+#else
     get_inter_predictor_masked_compound_y(x, bsize, pred0, pred1, residual1,
                                           diff10, stride);
+#endif
     *calc_pred_masked_compound = 0;
   }
   if (cpi->sf.inter_sf.prune_wedge_pred_diff_based &&
@@ -1239,9 +1256,15 @@
       CompoundTypeRdBuffers tmp_buf;
       int64_t tmp_rd = INT64_MAX;
       alloc_compound_type_rd_buffers_no_check(&tmp_buf);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+      get_inter_predictor_masked_compound_y(x, bsize, tmp_buf.pred0,
+                                            tmp_buf.pred1, tmp_buf.residual1,
+                                            tmp_buf.diff10, stride, cm);
+#else
       get_inter_predictor_masked_compound_y(x, bsize, tmp_buf.pred0,
                                             tmp_buf.pred1, tmp_buf.residual1,
                                             tmp_buf.diff10, stride);
+#endif
 
       tmp_rd = pick_interinter_mask[compound_type - COMPOUND_WEDGE](
           cpi, x, bsize, tmp_buf.pred0, tmp_buf.pred1, tmp_buf.residual1,

diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index 06809d7..fac10d5 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c

@@ -847,6 +847,7 @@
       arf_src_index = gf_group->arf_src_offset[gf_group->index];
     }
   }
+
   // Save the pointer to the original source image.
   YV12_BUFFER_CONFIG *source_buffer = frame_input->source;
   // apply filtering to frame
@@ -854,6 +855,7 @@
   if (apply_filtering) {
     // TODO(bohanli): figure out why we need frame_type in cm here.
     cm->current_frame.frame_type = frame_params->frame_type;
+
     const int code_arf =
         av1_temporal_filter(cpi, arf_src_index, &show_existing_alt_ref);
     if (code_arf) {
@@ -905,6 +907,9 @@
 
   // Set frame_input source to true source for psnr calculation.
   if (apply_filtering && is_psnr_calc_enabled(cpi)) {
+#if 0
+printf("invoking in denoise_and_encode() - 1\n");
+#endif
     cpi->source =
         av1_scale_if_required(cm, source_buffer, &cpi->scaled_source,
                               cm->features.interp_filter, 0, false, true);
@@ -982,6 +987,7 @@
 
   struct lookahead_entry *source = NULL;
   struct lookahead_entry *last_source = NULL;
+
   if (frame_params.show_existing_frame) {
     source = av1_lookahead_pop(cpi->lookahead, flush, cpi->compressor_stage);
     frame_params.show_frame = 1;
@@ -998,7 +1004,11 @@
   }
   // Source may be changed if temporal filtered later.
   frame_input.source = &source->img;
+#if 0
+printf("av1_encode_strategy(): frame_input.source->y_crop_width=%d, frame_input.source->y_crop_height=%d\n", frame_input.source->y_crop_width, frame_input.source->y_crop_height);
+#endif
   frame_input.last_source = last_source != NULL ? &last_source->img : NULL;
+
   frame_input.ts_duration = source->ts_end - source->ts_start;
   // Save unfiltered source. It is used in av1_get_second_pass_params().
   cpi->unfiltered_source = frame_input.source;

diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index d755568..c07fc23 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c

@@ -1354,6 +1354,22 @@
     pos_x += SCALE_EXTRA_OFF;
     pos_y += SCALE_EXTRA_OFF;
 
+#if CONFIG_2D_SR_ZERO_PHASE
+    // TODO: Determine plane type from something other than ssx, ssy
+    if (sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += (ssx == 1) ? inter_pred_params->posx_offset[1]
+                          : inter_pred_params->posx_offset[0];
+    }
+    if (sf->y_scale_fp != REF_NO_SCALE) {
+      pos_y += (ssy == 1) ? inter_pred_params->posy_offset[1]
+                          : inter_pred_params->posy_offset[0];
+    }
+#elif CONFIG_2D_SR_MC_PHASE_FIX
+    if (ssx == 1 && sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += inter_pred_params->posx_offset[1];
+    }
+#endif
+
     const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
     const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
     const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
@@ -1362,11 +1378,167 @@
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width : inter_pred_params->block_width;
+      const int bh = use_optflow_refinement ? inter_pred_params->orig_block_height : inter_pred_params->block_height;
+
+      MV mv_q4;
+      if (use_optflow_refinement) {
+        // optflow refinement always returns MVs with 1/16 precision so it is
+        // not necessary to shift the MV before clamping
+        mv_q4.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + inter_pred_params->subsampling_y);
+        mv_q4.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED(src_mv->col * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + inter_pred_params->subsampling_x);
+      } else {
+        mv_q4.row = (int16_t)(src_mv->row * (1 << (1 - inter_pred_params->subsampling_y)));
+        mv_q4.col = (int16_t)(src_mv->col * (1 << (1 - inter_pred_params->subsampling_x)));
+      }
+
+      const int spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS;
+      const int spel_right = spel_left - SUBPEL_SHIFTS;
+      const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
+      const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+
+      const SubpelMvLimits mv_limits = {
+        inter_pred_params->dist_to_left_edge * (1 << (1 - inter_pred_params->subsampling_x)) - spel_left,
+        inter_pred_params->dist_to_right_edge * (1 << (1 - inter_pred_params->subsampling_x)) + spel_right,
+        inter_pred_params->dist_to_top_edge * (1 << (1 - inter_pred_params->subsampling_y)) - spel_top,
+        inter_pred_params->dist_to_bottom_edge * (1 << (1 - inter_pred_params->subsampling_y)) + spel_bottom
+      };
+
+      clamp_mv(&mv_q4, &mv_limits);
+
+      int subbpel_pos_x = ((inter_pred_params->pix_col << SUBPEL_BITS) + mv_q4.col) << SCALE_EXTRA_BITS;
+      int subbpel_pos_y = ((inter_pred_params->pix_row << SUBPEL_BITS) + mv_q4.row) << SCALE_EXTRA_BITS;
+      subpel_params->subpel_x = subbpel_pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = subbpel_pos_y & SCALE_SUBPEL_MASK;
+    } else {
+      subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    }
+#else
     subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+#endif
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      int scale = 0;
+      if (sf->x_scale_fp == REF_2x_SCALE) scale = 2;
+      if (sf->x_scale_fp == REF_3x_SCALE) scale = 3;
+      if (sf->x_scale_fp == REF_4x_SCALE) scale = 4;
+      if (sf->x_scale_fp == REF_6x_SCALE) scale = 6;
+      assert(scale != 0);
+      inter_pred_params->conv_params.stride_scale = scale;
+#if CONFIG_2D_SR_CLAMP_MV_FOR_TIP
 
+      int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+      int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+
+#if CONFIG_OPTFLOW_REFINEMENT
+      MV clamped_mv;
+      if (use_optflow_refinement) {
+        clamped_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(
+            src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ssy);
+        clamped_mv.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED(
+            src_mv->col * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ssx);
+      } else {
+        clamped_mv.row = (int16_t)(src_mv->row * (1 << (1 - ssy)));
+        clamped_mv.col = (int16_t)(src_mv->col * (1 << (1 - ssx)));
+      }
+#endif
+
+      const int spel_left = (AOM_INTERP_EXTEND + inter_pred_params->block_width)
+                            << SUBPEL_BITS;
+      const int spel_right = spel_left - SUBPEL_SHIFTS;
+      const int spel_top = (AOM_INTERP_EXTEND + inter_pred_params->block_height)
+                           << SUBPEL_BITS;
+      const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+
+      int mi_row = inter_pred_params->pix_row /
+                   (MI_SIZE >> inter_pred_params->subsampling_y);
+      int mi_col = inter_pred_params->pix_col /
+                   (MI_SIZE >> inter_pred_params->subsampling_x);
+      int mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
+      int mb_to_bottom_edge =
+          GET_MV_SUBPEL((inter_pred_params->mi_rows - mi_row) * MI_SIZE -
+                        inter_pred_params->block_height);
+      int mb_to_left_edge = -GET_MV_SUBPEL((mi_col * MI_SIZE));
+      int mb_to_right_edge =
+          GET_MV_SUBPEL((inter_pred_params->mi_cols - mi_col) * MI_SIZE -
+                        inter_pred_params->block_width);
+
+      const SubpelMvLimits mv_limits = {
+        mb_to_left_edge * (1 << (1 - ssx)) - spel_left,
+        mb_to_right_edge * (1 << (1 - ssx)) + spel_right,
+        mb_to_top_edge * (1 << (1 - ssy)) - spel_top,
+        mb_to_bottom_edge * (1 << (1 - ssy)) + spel_bottom
+      };
+
+      clamp_mv(&clamped_mv, &mv_limits);
+
+      orig_pos_x += clamped_mv.col;
+      orig_pos_y += clamped_mv.row;
+
+      orig_pos_y = ((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale;
+      orig_pos_x = ((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale;
+
+#if CONFIG_D071_IMP_MSK_BLD
+      if (inter_pred_params->border_data.enable_bacp) {
+        // Get reference block top left coordinate.
+        subpel_params->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+        subpel_params->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+        // Get reference block bottom right coordinate.
+        subpel_params->x1 =
+            ((orig_pos_x +
+              (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+             SCALE_SUBPEL_BITS) +
+            scale;
+        subpel_params->y1 =
+            ((orig_pos_y +
+              (inter_pred_params->block_height - 1) * subpel_params->ys) >>
+             SCALE_SUBPEL_BITS) +
+            scale;
+      }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+#else
+      orig_pos_y = clamp(((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         top, bottom);
+      orig_pos_x = clamp(((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         left, right);
+#if CONFIG_D071_IMP_MSK_BLD
+      if (inter_pred_params->border_data.enable_bacp) {
+        // Get reference block top left coordinate.
+        subpel_params->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+        subpel_params->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+        // Get reference block bottom right coordinate.
+        subpel_params->x1 = ((orig_pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + scale;
+        subpel_params->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + scale;
+        //subpel_params->x1 = subpel_params->x0 + inter_pred_params->block_width * scale;
+        //subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height * scale;
+      }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+#endif
+      *pre = pre_buf->buf0 +
+             (orig_pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+             (orig_pos_x >> SCALE_SUBPEL_BITS);
+    } else {
+      inter_pred_params->conv_params.stride_scale = 1;
+#if CONFIG_D071_IMP_MSK_BLD
+      if (inter_pred_params->border_data.enable_bacp) {
+        // Get reference block top left coordinate.
+        subpel_params->x0 = pos_x >> SCALE_SUBPEL_BITS;
+        subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS;
+        // Get reference block bottom right coordinate.
+        subpel_params->x1 = subpel_params->x0 + inter_pred_params->block_width;
+        subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height;
+      }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+      *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+             (pos_x >> SCALE_SUBPEL_BITS);
+    }
+#else
 #if CONFIG_D071_IMP_MSK_BLD
     if (inter_pred_params->border_data.enable_bacp) {
       // Get reference block top left coordinate.
@@ -1377,9 +1549,9 @@
       subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height;
     }
 #endif  // CONFIG_D071_IMP_MSK_BLD
-
     *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
            (pos_x >> SCALE_SUBPEL_BITS);
+#endif
   } else {
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
     int pos_y = inter_pred_params->pix_row << SUBPEL_BITS;

diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 6e7a524..669563f 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c

@@ -123,7 +123,9 @@
     precision = MV_SUBPEL_NONE;
 #endif
   }
+#if !CONFIG_ADAPTIVE_MVD_TEST2
   if (mv_class > MV_CLASS_0 && is_adaptive_mvd) use_mv_class_offset = 0;
+#endif 
   if (use_mv_class_offset) {
 #endif  // CONFIG_ADAPTIVE_MVD
     // Integer bits
@@ -333,7 +335,9 @@
       precision = MV_SUBPEL_NONE;
 #endif
     }
+#if !CONFIG_ADAPTIVE_MVD_TEST2
     if (mv_class > MV_CLASS_0 && is_adaptive_mvd) use_mv_class_offset = 0;
+#endif    
     if (use_mv_class_offset) {
 #endif  // CONFIG_ADAPTIVE_MVD
 
@@ -541,7 +545,9 @@
         if (is_adaptive_mvd && (c != MV_CLASS_0 || d > 0)) {
           pb_mv_precision = MV_PRECISION_ONE_PEL;
         }
+#if !CONFIG_ADAPTIVE_MVD_TEST2
         if (c > MV_CLASS_0 && is_adaptive_mvd) use_mv_class_offset = 0;
+#endif        
         if (use_mv_class_offset) {
 #endif
 

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index b7f30ab..11b3710 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c

@@ -302,6 +302,12 @@
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
 
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  cm->mi_params.superres_scale_denominator = cpi->oxcf.superres_cfg.superres_scale_denominator;
+  cm->mi_params.frm_width = cpi->oxcf.frm_dim_cfg.width;
+  cm->mi_params.frm_height = cpi->oxcf.frm_dim_cfg.height;
+#endif
+
   // We need to reallocate the context buffers here in case we need more mis.
   if (av1_alloc_context_buffers(cm, cm->width, cm->height)) {
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
@@ -539,11 +545,15 @@
       }
     }
   }
-
+#if CONFIG_2D_SR_ADJUST_DELTA_QP
+  const int is_360p_or_larger = 0;
+  const int is_720p_or_larger = 0;
+#else
   const int is_360p_or_larger =
       AOMMIN(seq->max_frame_width, seq->max_frame_height) >= 360;
   const int is_720p_or_larger =
       AOMMIN(seq->max_frame_width, seq->max_frame_height) >= 720;
+#endif
   if (!is_360p_or_larger) {
     seq->base_y_dc_delta_q = -7;
     seq->base_uv_dc_delta_q = -6;
@@ -675,6 +685,47 @@
   init_buffer_indices(&cpi->force_intpel_info, cm->remapped_ref_idx);
 
   av1_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
+ 
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+  if (cpi->oxcf.superres_cfg.superres_mode == AOM_SUPERRES_FIXED || cpi->oxcf.superres_cfg.superres_mode == AOM_SUPERRES_QTHRESH) {
+    if (cpi->oxcf.superres_cfg.enable_superres) {
+      const int this_index = 0;
+      cpi->allowed_non1x_superres_scale_denominator[this_index] = cpi->oxcf.superres_cfg.superres_scale_denominator;
+      int w = oxcf->frm_dim_cfg.width;
+      int h = oxcf->frm_dim_cfg.height;
+      av1_calculate_scaled_size(&w, &h, cpi->allowed_non1x_superres_scale_denominator[this_index]);
+      cpi->allowed_non1x_scale_count = 1;
+    } else {
+      cpi->allowed_non1x_scale_count = 0;
+    }
+  }
+  else if(cpi->oxcf.superres_cfg.superres_mode == AOM_SUPERRES_AUTO || cpi->oxcf.superres_cfg.superres_mode == AOM_SUPERRES_RANDOM) {
+    cpi->allowed_non1x_scale_count = 5;
+    int denom[] = {6, 8, 12, 16, 24};
+    for (int this_index = 0; this_index < cpi->allowed_non1x_scale_count; ++this_index) {
+      int w = oxcf->frm_dim_cfg.width;
+      int h = oxcf->frm_dim_cfg.height;
+      cpi->allowed_non1x_superres_scale_denominator[this_index] = denom[this_index];
+      av1_calculate_scaled_size(&w, &h, cpi->allowed_non1x_superres_scale_denominator[this_index]);
+    }
+  }
+  else {
+      cpi->allowed_non1x_scale_count = 0;
+  }
+#else
+  if (cpi->oxcf.superres_cfg.enable_superres) {
+    const int this_index = 0;
+    cpi->allowed_non1x_superres_scale_denominator[this_index] = cpi->oxcf.superres_cfg.superres_scale_denominator;
+    int w = oxcf->frm_dim_cfg.width;
+    int h = oxcf->frm_dim_cfg.height;
+    av1_calculate_scaled_size(&w, &h, cpi->allowed_non1x_superres_scale_denominator[this_index]);
+    cpi->allowed_non1x_scale_count = 1;
+  } else {
+    cpi->allowed_non1x_scale_count = 0;
+  }
+#endif
+#endif 
 }
 
 int aom_strcmp(const char *a, const char *b) {
@@ -1022,11 +1073,18 @@
 #if CONFIG_TIP
 static INLINE void init_tip_ref_frame(AV1_COMMON *const cm) {
   cm->tip_ref.tip_frame = aom_calloc(1, sizeof(*cm->tip_ref.tip_frame));
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  memset(&cm->tip_ref.upscaled_tip_frame_buf, 0,
+         sizeof(cm->tip_ref.upscaled_tip_frame_buf));
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 
 static INLINE void free_tip_ref_frame(AV1_COMMON *const cm) {
   aom_free_frame_buffer(&cm->tip_ref.tip_frame->buf);
   aom_free(cm->tip_ref.tip_frame);
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  aom_free_frame_buffer(&cm->tip_ref.upscaled_tip_frame_buf);
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 
 #if CONFIG_OPTFLOW_ON_TIP
@@ -1125,6 +1183,15 @@
   cm->current_frame.frame_number = 0;
   cm->current_frame.key_frame_number = 0;
   cm->current_frame_id = -1;
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+  if (oxcf->superres_cfg.enable_superres) {
+    cm->superres_scale_denominator = oxcf->superres_cfg.superres_scale_denominator;
+    cm->superres_scale_numerator = SCALE_NUMERATOR;
+  } else {
+    cm->superres_scale_denominator = SCALE_NUMERATOR;
+    cm->superres_scale_numerator = SCALE_NUMERATOR;    
+  }
+#endif
   cpi->seq_params_locked = 0;
   cpi->partition_search_skippable_frame = 0;
   cpi->tile_data = NULL;
@@ -1622,6 +1689,7 @@
 static void set_mv_search_params(AV1_COMP *cpi) {
   const AV1_COMMON *const cm = &cpi->common;
   MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
+
   const int max_mv_def = AOMMAX(cm->width, cm->height);
 
   // Default based on max resolution.
@@ -1919,6 +1987,7 @@
   if (cm->seq_params.force_screen_content_tools != 2) {
     features->allow_screen_content_tools = features->allow_intrabc =
         cm->seq_params.force_screen_content_tools;
+
     return;
   }
 
@@ -2062,7 +2131,7 @@
 #endif  // !COUPLED_CHROMA_FROM_LUMA_RESTORATION
 
   if (width * height > 352 * 288)
-    rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX;
+	  rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX;
   else
     rst[0].restoration_unit_size = (RESTORATION_UNITSIZE_MAX >> 1);
   rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s;
@@ -2158,6 +2227,21 @@
   }
 
   tip_frame->frame_type = INTER_FRAME;
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+  if (cm->tip_ref.upscaled_tip_frame_buf.y_crop_width !=
+          cm->superres_upscaled_width ||
+      cm->tip_ref.upscaled_tip_frame_buf.y_crop_height !=
+          cm->superres_upscaled_height) {
+    if (aom_realloc_frame_buffer(
+            &cm->tip_ref.upscaled_tip_frame_buf, cm->superres_upscaled_width,
+            cm->superres_upscaled_height, cm->seq_params.subsampling_x,
+            cm->seq_params.subsampling_y, cpi->oxcf.border_in_pixels,
+            cm->features.byte_alignment, NULL, NULL, NULL)) {
+      aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+                         "Failed to allocate frame buffer");
+    }
+  }
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
 }
 #endif  // CONFIG_TIP
 
@@ -2210,7 +2294,12 @@
   const int frame_height = cm->superres_upscaled_height;
   set_restoration_unit_size(frame_width, frame_height,
                             seq_params->subsampling_x,
+#if CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE
+                            seq_params->subsampling_y, cm->rst_info, cm->superres_scale_denominator);
+#else                            
                             seq_params->subsampling_y, cm->rst_info);
+#endif
+
   for (int i = 0; i < num_planes; ++i)
     cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
 #if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
@@ -2332,7 +2421,11 @@
 
   MultiThreadInfo *const mt_info = &cpi->mt_info;
   const int num_workers = mt_info->num_workers;
+#if CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR
+  if (use_restoration && !av1_superres_scaled(cm))
+#else      
   if (use_restoration)
+#endif      
     av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 0);
 
   if (use_cdef) {
@@ -2468,6 +2561,7 @@
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, loop_restoration_time);
 #endif
+
 }
 
 /*!\brief Select and apply in-loop deblocking filters, cdef filters, and
@@ -2585,9 +2679,12 @@
 #if CONFIG_COLLECT_COMPONENT_TIMING
   printf("\n Encoding a frame:");
 #endif
-
+  
   aom_clear_system_state();
-
+#if 0
+printf("invoking in encode_without_recode() - 1\n");
+printf("unscaled->y_crop_width=%d, unscaled->y_crop_height=%d, cm->width=%d, cm->height=%d\n", unscaled->y_crop_width, unscaled->y_crop_height, cm->width, cm->height);
+#endif
   cpi->source = av1_scale_if_required(cm, unscaled, &cpi->scaled_source,
                                       filter_scaler, phase_scaler, true, false);
   if (frame_is_intra_only(cm) || resize_pending != 0) {
@@ -2597,6 +2694,9 @@
   }
 
   if (cpi->unscaled_last_source != NULL) {
+#if 0
+printf("invoking in encode_without_recode() - 2\n");
+#endif
     cpi->last_source = av1_scale_if_required(
         cm, cpi->unscaled_last_source, &cpi->scaled_last_source, filter_scaler,
         phase_scaler, true, false);
@@ -2629,7 +2729,11 @@
   // (zero_mode is forced), and since the scaled references are only
   // use for newmv search, we can avoid scaling here.
   if (!frame_is_intra_only(cm))
+#if CONFIG_2D_SR
+    av1_scale_references(cpi, filter_scaler, phase_scaler, 0);
+#else
     av1_scale_references(cpi, filter_scaler, phase_scaler, 1);
+#endif
 
   av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                     q_cfg->enable_chroma_deltaq);
@@ -2740,10 +2844,6 @@
 
   if (cm->current_frame.frame_type == KEY_FRAME) copy_frame_prob_info(cpi);
 
-#if CONFIG_COLLECT_COMPONENT_TIMING
-  printf("\n Encoding a frame:");
-#endif
-
   // Determine whether to use screen content tools using two fast encoding.
   av1_determine_sc_tools_with_encoding(cpi, q);
 
@@ -2788,11 +2888,17 @@
         gm_info->search_done = 0;
       }
     }
+#if 0
+printf("invoking in encode_with_recode_loop() - 1\n");
+#endif
     cpi->source =
         av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source,
                               EIGHTTAP_REGULAR, 0, false, false);
 
     if (cpi->unscaled_last_source != NULL) {
+#if 0
+printf("invoking in encode_with_recode_loop() - 2\n");
+#endif
       cpi->last_source = av1_scale_if_required(
           cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
           EIGHTTAP_REGULAR, 0, false, false);
@@ -2956,8 +3062,10 @@
 #if CONFIG_TIP
 static INLINE bool allow_tip_direct_output(AV1_COMMON *const cm) {
   if (!frame_is_intra_only(cm) && !encode_show_existing_frame(cm) &&
-      cm->seq_params.enable_tip == 1 && cm->features.tip_frame_mode &&
-      !av1_superres_scaled(cm)) {
+#if !CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+      !av1_superres_scaled(cm) &&
+#endif  // !CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+      cm->seq_params.enable_tip == 1 && cm->features.tip_frame_mode) {
     return true;
   }
 
@@ -2987,8 +3095,13 @@
 #endif  // CONFIG_PEF
 
     // Compute sse and rate.
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+    YV12_BUFFER_CONFIG *tip_frame_buf =
+        !av1_superres_scaled(cm) ? &cm->tip_ref.tip_frame->buf
+                                 : &cm->tip_ref.upscaled_tip_frame_buf;
+#else
     YV12_BUFFER_CONFIG *tip_frame_buf = &cm->tip_ref.tip_frame->buf;
-
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
     *sse = aom_highbd_get_y_sse(cpi->source, tip_frame_buf);
 
     const int64_t bits = (*size << 3);
@@ -3024,12 +3137,26 @@
       rdmult, tip_as_ref_rate, tip_as_ref_sse, cm->seq_params.bit_depth);
   const double tip_direct_output_rdcost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
       rdmult, tip_as_output_rate, tip_as_output_sse, cm->seq_params.bit_depth);
+
+
   if (tip_direct_output_rdcost < normal_coding_rdcost) {
     cm->features.tip_frame_mode = TIP_FRAME_AS_OUTPUT;
     const int num_planes = av1_num_planes(cm);
     av1_copy_tip_frame_tmvp_mvs(cm);
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+    YV12_BUFFER_CONFIG *tip_frame_buf =
+        !av1_superres_scaled(cm) ? &cm->tip_ref.tip_frame->buf
+                                 : &cm->tip_ref.upscaled_tip_frame_buf;
+#else
+    YV12_BUFFER_CONFIG *tip_frame_buf = &cm->tip_ref.tip_frame->buf;
+#endif  // CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES
+#if CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES_FIX
+    aom_yv12_copy_frame(tip_frame_buf, &cm->cur_frame->buf,
+                        num_planes);
+#else
     aom_yv12_copy_frame(&cm->tip_ref.tip_frame->buf, &cm->cur_frame->buf,
                         num_planes);
+#endif
 
     cm->lf.filter_level[0] = 0;
     cm->lf.filter_level[1] = 0;
@@ -3123,6 +3250,23 @@
   }
 
   AV1_COMMON *const cm = &cpi->common;
+#if CONFIG_2D_SR_RESTORATION_BUG_FIX_ON_STRIDE
+  if (cpi->oxcf.superres_cfg.enable_superres) {
+	  for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+		  int w = cm->mi_params.mi_cols << MI_SIZE_LOG2;
+//		  int h = cm->mi_params.mi_rows << MI_SIZE_LOG2;
+		  w = ((w + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
+//		  h = ((h + MAX_SB_SIZE - 1) >> MAX_SB_SIZE_LOG2) << MAX_SB_SIZE_LOG2;
+		  w >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_x);
+//		  h >>= ((plane == 0) ? 0 : cm->seq_params.subsampling_y);
+		  int stride = (w + MIN_TX_SIZE - 1) >> MIN_TX_SIZE_LOG2;
+//		  int rows = (h + MIN_TX_SIZE - 1) >> MIN_TX_SIZE_LOG2;
+//		  cm->mi_params.tx_skip_buf_size[plane] = rows * stride;
+		  cm->mi_params.tx_skip_stride[plane] = stride;
+		  cm->mi_params.wiener_class_id_stride[plane] = stride;
+	  }
+  }
+#endif  
   SequenceHeader *const seq_params = &cm->seq_params;
 
   // Special case code to reduce pulsing when key frames are forced at a
@@ -3151,7 +3295,7 @@
     cm->seq_params.lr_tools_disable_mask[0],
     cm->seq_params.lr_tools_disable_mask[1]
   };
-#if CONFIG_PC_WIENER
+#if CONFIG_PC_WIENER && !CONFIG_2D_SR_PC_WIENER_ENABLE_FOR_SR
   if (av1_superres_scaled(cm)) {
     master_lr_tools_disable_mask[0] |= (1 << RESTORE_PC_WIENER);
     master_lr_tools_disable_mask[1] |= (1 << RESTORE_PC_WIENER);
@@ -3197,6 +3341,7 @@
   // aom_extend_frame_inner_borders(&cm->cur_frame->buf, av1_num_planes(cm));
   aom_extend_frame_borders(&cm->cur_frame->buf, av1_num_planes(cm));
 
+
 #ifdef OUTPUT_YUV_REC
   aom_write_one_yuv_frame(cm, &cm->cur_frame->buf);
 #endif
@@ -3247,58 +3392,162 @@
   int64_t rate2 = INT64_MAX;
   int largest_tile_id2;
   double proj_rdcost1 = DBL_MAX;
+  int best_denom = -1;
+
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING_SSE_MATCHING
+  FrameProbInfo *const frame_probs = &cpi->frame_probs;
+  const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+  int warped_probs_tmp = frame_probs->warped_probs[update_type];
+#endif
+
+  // Get base_qindex and rdmult first
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+  int64_t rdmult = av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
+#else
+  int top_index = 0, bottom_index = 0, q = 0;
+  q = av1_rc_pick_q_and_bounds(cpi, &cpi->rc, cm->width, cm->height,
+                               cpi->gf_group.index, &bottom_index, &top_index);
+  int64_t rdmult = av1_compute_rd_mult_based_on_qindex(cpi, q);
+#endif
 
   // Encode with superres.
   if (cpi->sf.hl_sf.superres_auto_search_type == SUPERRES_AUTO_ALL) {
     SuperResCfg *const superres_cfg = &cpi->oxcf.superres_cfg;
+#if CONFIG_2D_SR_SCALE_EXT
+    int64_t superres_sses[3 * SCALE_NUMERATOR];
+    int64_t superres_rates[3 * SCALE_NUMERATOR];
+    double superres_rds[3 * SCALE_NUMERATOR];
+    int superres_largest_tile_ids[3 * SCALE_NUMERATOR];
+#else
     int64_t superres_sses[SCALE_NUMERATOR];
     int64_t superres_rates[SCALE_NUMERATOR];
+    double superres_rds[SCALE_NUMERATOR];
     int superres_largest_tile_ids[SCALE_NUMERATOR];
+#endif
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+    int min_scale = 0;
+    int max_scale = 4;
+#endif
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+    int screen_content_tools_org = cpi->common.features.allow_screen_content_tools;
+#endif
+#if CONFIG_2D_SR_TILE_CONFIG
+    unsigned int tile_columns_org = cpi->oxcf.tile_cfg.tile_columns;
+    unsigned int tile_rows_org = cpi->oxcf.tile_cfg.tile_rows;
+#endif  // CONFIG_2D_SR_TILE_CONFIG
     // Use superres for Key-frames and Alt-ref frames only.
     const GF_GROUP *const gf_group = &cpi->gf_group;
     if (gf_group->update_type[gf_group->index] != OVERLAY_UPDATE &&
         gf_group->update_type[gf_group->index] != INTNL_OVERLAY_UPDATE) {
+#if CONFIG_2D_SR
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+    for (int this_index = min_scale; this_index <= max_scale; this_index++) {
+#else
+    for (int this_index = 0; this_index < SUPERRES_SCALES; ++this_index) {
+#endif
+        const int denom = superres_scales[this_index].scale_denom;
+#else   // CONFIG_2D_SR
       for (int denom = SCALE_NUMERATOR + 1; denom <= 2 * SCALE_NUMERATOR;
            ++denom) {
+		  const int this_index = denom - (SCALE_NUMERATOR + 1);
+#endif  // CONFIG_2D_SR
         superres_cfg->superres_scale_denominator = denom;
         superres_cfg->superres_kf_scale_denominator = denom;
-        const int this_index = denom - (SCALE_NUMERATOR + 1);
 
         cpi->superres_mode = AOM_SUPERRES_AUTO;  // Super-res on for this loop.
+
+#if CONFIG_2D_SR_TILE_CONFIG
+  if (cpi->superres_mode == AOM_SUPERRES_AUTO &&
+        (superres_cfg->superres_scale_denominator != SCALE_NUMERATOR ||
+        superres_cfg->superres_kf_scale_denominator != SCALE_NUMERATOR)){
+    cpi->oxcf.tile_cfg.tile_columns = 0;
+    cpi->oxcf.tile_cfg.tile_rows = 0;
+  }
+#endif  // CONFIG_2D_SR_TILE_CONFIG
+
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+        // Override allow_screen_content_tools setting
+        if (cpi->superres_mode == AOM_SUPERRES_AUTO && superres_cfg->superres_scale_denominator!= SCALE_NUMERATOR) {
+          cpi->common.features.allow_screen_content_tools = 0;
+        }
+#endif
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING_SSE_MATCHING
+        frame_probs->warped_probs[update_type] = warped_probs_tmp;
+#endif        
         err = encode_with_recode_loop_and_filter(
             cpi, size, dest, &superres_sses[this_index],
             &superres_rates[this_index],
             &superres_largest_tile_ids[this_index]);
         cpi->superres_mode = AOM_SUPERRES_NONE;  // Reset to default (full-res).
         if (err != AOM_CODEC_OK) return err;
+#if !CONFIG_2D_SR_FRAME_WISE_SWITCHING
+        superres_rds[this_index] = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+            rdmult, superres_rates[this_index], superres_sses[this_index],
+            cm->seq_params.bit_depth);
         restore_all_coding_context(cpi);
+        if (superres_rds[this_index] <= proj_rdcost1) {
+          sse1 = superres_sses[this_index];
+          rate1 = superres_rates[this_index];
+          largest_tile_id1 = superres_largest_tile_ids[this_index];
+          proj_rdcost1 = superres_rds[this_index];
+          best_denom = denom;
+        } else {
+          break;  // if the cost starts going up, terminate the search
+        }
+#endif
       }
       // Reset.
       superres_cfg->superres_scale_denominator = SCALE_NUMERATOR;
       superres_cfg->superres_kf_scale_denominator = SCALE_NUMERATOR;
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+      cpi->common.features.allow_screen_content_tools = screen_content_tools_org;
+#endif      
+#if CONFIG_2D_SR_TILE_CONFIG
+      cpi->oxcf.tile_cfg.tile_columns = tile_columns_org;
+      cpi->oxcf.tile_cfg.tile_rows = tile_rows_org;
+#endif  // CONFIG_2D_SR_TILE_CONFIG
     } else {
+#if CONFIG_2D_SR
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+    for (int this_index = min_scale; this_index <= max_scale; this_index++) {
+#else
+    for (int this_index = 0; this_index < SUPERRES_SCALES; ++this_index) {
+#endif
+#else   // CONFIG_2D_SR
       for (int denom = SCALE_NUMERATOR + 1; denom <= 2 * SCALE_NUMERATOR;
            ++denom) {
         const int this_index = denom - (SCALE_NUMERATOR + 1);
+#endif  // CONFIG_2D_SR
         superres_sses[this_index] = INT64_MAX;
         superres_rates[this_index] = INT64_MAX;
       }
     }
     // Encode without superres.
     assert(cpi->superres_mode == AOM_SUPERRES_NONE);
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING_SSE_MATCHING
+    frame_probs->warped_probs[update_type] = warped_probs_tmp;
+#endif                                             
     err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse2, &rate2,
                                              &largest_tile_id2);
     if (err != AOM_CODEC_OK) return err;
 
     // Note: Both use common rdmult based on base qindex of fullres.
-    const int64_t rdmult =
-        av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
+    rdmult =  av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
 
     // Find the best rdcost among all superres denoms.
-    int best_denom = -1;
+    best_denom = -1;
+#if CONFIG_2D_SR
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+    for (int this_index = min_scale; this_index <= max_scale; this_index++) {
+#else
+    for (int this_index = 0; this_index < SUPERRES_SCALES; ++this_index) {
+#endif
+    const int denom = superres_scales[this_index].scale_denom;
+#else   // CONFIG_2D_SR
     for (int denom = SCALE_NUMERATOR + 1; denom <= 2 * SCALE_NUMERATOR;
          ++denom) {
       const int this_index = denom - (SCALE_NUMERATOR + 1);
+#endif
       const int64_t this_sse = superres_sses[this_index];
       const int64_t this_rate = superres_rates[this_index];
       const int this_largest_tile_id = superres_largest_tile_ids[this_index];
@@ -3323,10 +3572,22 @@
       // Again, temporarily force the best denom.
       superres_cfg->superres_scale_denominator = best_denom;
       superres_cfg->superres_kf_scale_denominator = best_denom;
+#if CONFIG_2D_SR_TILE_CONFIG
+    cpi->oxcf.tile_cfg.tile_columns = 0;
+    cpi->oxcf.tile_cfg.tile_rows = 0;
+#endif  // CONFIG_2D_SR_TILE_CONFIG      
       int64_t sse3 = INT64_MAX;
       int64_t rate3 = INT64_MAX;
       cpi->superres_mode =
           AOM_SUPERRES_AUTO;  // Super-res on for this recode loop.
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+    // Override allow_screen_content_tools setting
+    if (cpi->superres_mode == AOM_SUPERRES_AUTO && superres_cfg->superres_scale_denominator != SCALE_NUMERATOR)
+      cpi->common.features.allow_screen_content_tools = 0;
+#endif
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING_SSE_MATCHING
+      frame_probs->warped_probs[update_type] = warped_probs_tmp;
+#endif                                               
       err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse3, &rate3,
                                                largest_tile_id);
       cpi->superres_mode = AOM_SUPERRES_NONE;  // Reset to default (full-res).
@@ -3336,8 +3597,19 @@
       // Reset.
       superres_cfg->superres_scale_denominator = SCALE_NUMERATOR;
       superres_cfg->superres_kf_scale_denominator = SCALE_NUMERATOR;
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+       cpi->common.features.allow_screen_content_tools = screen_content_tools_org;
+#endif
+#if CONFIG_2D_SR_TILE_CONFIG
+      cpi->oxcf.tile_cfg.tile_columns = tile_columns_org;
+      cpi->oxcf.tile_cfg.tile_rows = tile_rows_org;
+#endif  // CONFIG_2D_SR_TILE_CONFIG      
     } else {
       *largest_tile_id = largest_tile_id2;
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+      cpi->common.features.allow_screen_content_tools = screen_content_tools_org;
+#endif      
+
     }
   } else {
     assert(cpi->sf.hl_sf.superres_auto_search_type == SUPERRES_AUTO_DUAL);
@@ -3355,8 +3627,10 @@
     if (err != AOM_CODEC_OK) return err;
 
     // Note: Both use common rdmult based on base qindex of fullres.
+    /*
     const int64_t rdmult =
         av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
+        */
     proj_rdcost1 = RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, rate1, sse1,
                                                   cm->seq_params.bit_depth);
     const double proj_rdcost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
@@ -3912,6 +4186,7 @@
   return res;
 }
 
+
 #if CONFIG_INTERNAL_STATS
 extern double av1_get_blockiness(const unsigned char *img1, int img1_pitch,
                                  const unsigned char *img2, int img2_pitch,

diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index e533bdc..cb4a1e5 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h

@@ -2424,6 +2424,11 @@
    */
   struct lookahead_ctx *lookahead;
 
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+  uint8_t allowed_non1x_superres_scale_denominator[SUPERRES_SCALES];
+  int allowed_non1x_scale_count;
+#endif
+
   /*!
    * When set, this flag indicates that the current frame is a forward keyframe.
    */
@@ -2494,7 +2499,11 @@
    * Pointer to the buffer holding the scaled reference frames.
    * scaled_ref_buf[i] holds the scaled reference frame of type i.
    */
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+  RefCntBuffer *scaled_ref_buf[INTER_REFS_PER_FRAME * (SUPERRES_SCALES + 1)];
+#else
   RefCntBuffer *scaled_ref_buf[INTER_REFS_PER_FRAME];
+#endif
 
   /*!
    * Pointer to the buffer holding the last show frame.
@@ -2958,6 +2967,7 @@
    * found in the frame update type with enum value equal to i
    */
   int valid_gm_model_found[FRAME_UPDATE_TYPES];
+
 } AV1_COMP;
 
 /*!
@@ -3067,7 +3077,6 @@
 int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                           int64_t end_time_stamp);
-
 /*!\brief Encode a frame
  *
  * \ingroup high_level_algo
@@ -3586,6 +3595,26 @@
 }
 #endif
 
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+static INLINE int to_scale_index(const AV1_COMP *cpi, uint8_t denom) {
+  int scale_idx = -1;
+
+  for (int this_index = 0; this_index < cpi->allowed_non1x_scale_count; ++this_index) {
+    if (cpi->allowed_non1x_superres_scale_denominator[this_index] == denom) {
+      scale_idx = this_index;
+      break;
+    }
+  }
+
+  if (scale_idx < 0)  {
+    printf("scale_idx < 0, did not match denom=%d\n", denom);
+    exit(0);
+  }
+
+  return scale_idx;
+}
+#endif
+
 /*!\endcond */
 
 #ifdef __cplusplus

diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index d40a3a8..52849a5 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h

@@ -56,6 +56,12 @@
   AV1_COMMON *cm = &cpi->common;
   TokenInfo *token_info = &cpi->token_info;
 
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  cm->mi_params.superres_scale_denominator = cpi->oxcf.superres_cfg.superres_scale_denominator;
+  cm->mi_params.frm_width = cpi->oxcf.frm_dim_cfg.width;
+  cm->mi_params.frm_height = cpi->oxcf.frm_dim_cfg.height;
+#endif
+
   if (av1_alloc_context_buffers(cm, cm->width, cm->height)) {
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate context buffers");

diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index d8c6bee..7f5b092 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c

@@ -497,15 +497,23 @@
   const int num_planes = av1_num_planes(cm);
   MV_REFERENCE_FRAME ref_frame;
 
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+  const uint8_t denom = cpi->oxcf.superres_cfg.superres_scale_denominator;
+  const int scale_index = (denom == SCALE_NUMERATOR) ? SUPERRES_SCALES : to_scale_index(cpi, denom);
+#endif
+
   for (ref_frame = 0; ref_frame < INTER_REFS_PER_FRAME; ++ref_frame) {
     // Need to convert from AOM_REFFRAME to index into ref_mask (subtract 1).
     if (cm->ref_frame_flags & (1 << ref_frame)) {
       BufferPool *const pool = cm->buffer_pool;
       const YV12_BUFFER_CONFIG *const ref =
           get_ref_frame_yv12_buf(cm, ref_frame);
-
       if (ref == NULL) {
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+        cpi->scaled_ref_buf[ref_frame * (SUPERRES_SCALES + 1) + scale_index] = NULL;
+#else
         cpi->scaled_ref_buf[ref_frame] = NULL;
+#endif
         continue;
       }
 
@@ -525,7 +533,11 @@
           }
         }
         int force_scaling = 0;
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+        RefCntBuffer *new_fb = cpi->scaled_ref_buf[ref_frame * (SUPERRES_SCALES + 1) + scale_index];
+#else
         RefCntBuffer *new_fb = cpi->scaled_ref_buf[ref_frame];
+#endif
         if (new_fb == NULL) {
           const int new_fb_idx = get_free_fb(cm);
           if (new_fb_idx == INVALID_IDX) {
@@ -550,24 +562,100 @@
             aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                                "Failed to allocate frame buffer");
           }
+#if CONFIG_2D_SR
+          if (cm->superres_scale_denominator > cm->superres_scale_numerator) {
+#if CONFIG_2D_SR_ZERO_PHASE
+            if ((cm->superres_scale_denominator != 6 ) &&  // 1.5x
+                (cm->superres_scale_denominator != 8 ) &&  // 2x
+                (cm->superres_scale_denominator != 12) &&  // 3x
+                (cm->superres_scale_denominator != 16) &&  // 4x
+                (cm->superres_scale_denominator != 24)) {  // 6x
+              printf("av1_scale_references(): Unsupported scaling factor\n");
+              exit(0);
+            }
+
+            if ((cm->superres_scale_denominator == 8 ) || // 2x
+                (cm->superres_scale_denominator == 12) || // 3x
+                (cm->superres_scale_denominator == 16) || // 4x
+                (cm->superres_scale_denominator == 24)) { // 6x
+              int scale = 0;
+              if (cm->superres_scale_denominator == 8 ) scale = 2;  // 2x
+              if (cm->superres_scale_denominator == 12) scale = 3; // 3x
+              if (cm->superres_scale_denominator == 16) scale = 4; // 4x
+              if (cm->superres_scale_denominator == 24) scale = 6; // 6x
+              assert(scale != 0);
+
+              const int num_planes = av1_num_planes(cm);
+              YV12_BUFFER_CONFIG *fb = &(new_fb->buf);
+              for (int i = 0; i < num_planes; ++i) {
+                const int is_uv = (i > 0) ? 1 : 0;
+
+                uint16_t *src = ref->buffers[i];
+                int src_stride = ref->strides[is_uv];
+                uint16_t *dst = fb->buffers[i];
+                int dst_stride = fb->strides[is_uv];
+                
+
+                for (int r = 0; r < fb->crop_heights[is_uv]; r++) {
+                  uint16_t *dst0 = dst;
+                  uint16_t *src0 = src;
+                  for (int c = 0; c < fb->crop_widths[is_uv]; c++) {
+                    (*dst) = (*src);
+                    dst++;
+                    src += scale;
+                  }
+                  dst = dst0 + dst_stride;
+                  src = src0 + scale * src_stride;
+                }
+              }
+              aom_extend_frame_borders(fb, num_planes);
+            } else {  // 1.5x
+              av1_resize_lanczos_and_extend_frame(
+                  ref, &new_fb->buf, (int)cm->seq_params.bit_depth, num_planes,
+                  cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
+                  cm->superres_scale_denominator, cm->superres_scale_numerator);
+            }
+#else
+            av1_resize_lanczos_and_extend_frame(
+                ref, &new_fb->buf, (int)cm->seq_params.bit_depth, num_planes,
+                cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
+                cm->superres_scale_denominator, cm->superres_scale_numerator);
+#endif
+          } else {
+#endif  // CONFIG_2D_SR
           if (use_optimized_scaler && cm->seq_params.bit_depth == AOM_BITS_8)
             av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase,
                                         num_planes);
           else
             av1_resize_and_extend_frame_nonnormative(
                 ref, &new_fb->buf, (int)cm->seq_params.bit_depth, num_planes);
+#if CONFIG_2D_SR
+          }
+#endif  // CONFIG_2D_SR
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+          cpi->scaled_ref_buf[ref_frame * (SUPERRES_SCALES + 1) + scale_index] = new_fb;
+#else
           cpi->scaled_ref_buf[ref_frame] = new_fb;
+#endif
           alloc_frame_mvs(cm, new_fb);
         }
       } else {
         RefCntBuffer *buf = get_ref_frame_buf(cm, ref_frame);
         buf->buf.y_crop_width = ref->y_crop_width;
         buf->buf.y_crop_height = ref->y_crop_height;
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+        cpi->scaled_ref_buf[ref_frame * (SUPERRES_SCALES + 1) + scale_index] = buf;
+#else
         cpi->scaled_ref_buf[ref_frame] = buf;
+#endif
         ++buf->ref_count;
       }
     } else {
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+      if (!has_no_stats_stage(cpi)) cpi->scaled_ref_buf[ref_frame * (SUPERRES_SCALES + 1) + scale_index] = NULL;
+#else
       if (!has_no_stats_stage(cpi)) cpi->scaled_ref_buf[ref_frame] = NULL;
+#endif
     }
   }
 }

diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index 612bde5..0fbe755 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h

@@ -51,6 +51,8 @@
 
 static AOM_INLINE void set_mb_mi(CommonModeInfoParams *mi_params, int width,
                                  int height) {
+
+
   // Ensure that the decoded width and height are both multiples of
   // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
   // subsampling is used).
@@ -62,7 +64,6 @@
   mi_params->mi_cols = aligned_width >> MI_SIZE_LOG2;
   mi_params->mi_rows = aligned_height >> MI_SIZE_LOG2;
   mi_params->mi_stride = calc_mi_size(mi_params->mi_cols);
-
   mi_params->mb_cols = (mi_params->mi_cols + 2) >> 2;
   mi_params->mb_rows = (mi_params->mi_rows + 2) >> 2;
   mi_params->MBs = mi_params->mb_rows * mi_params->mb_cols;
@@ -105,7 +106,9 @@
 
 static AOM_INLINE void enc_set_mb_mi(CommonModeInfoParams *mi_params, int width,
                                      int height) {
+
   const int is_4k_or_larger = AOMMIN(width, height) >= 2160;
+
   mi_params->mi_alloc_bsize = is_4k_or_larger ? BLOCK_8X8 : BLOCK_4X4;
 
   set_mb_mi(mi_params, width, height);
@@ -902,7 +905,11 @@
 
 static AOM_INLINE void release_scaled_references(AV1_COMP *cpi) {
   // TODO(isbs): only refresh the necessary frames, rather than all of them
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+  for (int i = 0; i < INTER_REFS_PER_FRAME * (SUPERRES_SCALES + 1); ++i) {
+#else
   for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+#endif
     RefCntBuffer *const buf = cpi->scaled_ref_buf[i];
     if (buf != NULL) {
       --buf->ref_count;

diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 388a753..863d10e 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c

@@ -909,7 +909,7 @@
   const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#if CONFIG_PC_WIENER
+#if CONFIG_PC_WIENER && !CONFIG_2D_SR_SET_TX_SKIP_ZERO
   if (!is_global_intrabc_allowed(cm) && !cm->features.coded_lossless) {
     // Assert only when LR is enabled.
     assert((eob == 0) == av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, plane,

diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index f606924..bfa49ae 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c

@@ -1192,6 +1192,7 @@
   TX_SIZE tx_size = max_txsize_lookup[bsize];
   int mi_height = mi_size_high[bsize];
   int num_active_workers = cpi->tpl_data.tpl_mt_sync.num_threads_working;
+
   for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows;
        mi_row += num_active_workers * mi_height) {
     // Motion estimation row boundary

diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index 9d3f5dc..4375c2d 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c

@@ -233,7 +233,11 @@
 #if CONFIG_IBC_BV_IMPROVEMENT
       is_ibc_cost,
 #endif
-      first_pass_search_sites, fine_search_interval);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  first_pass_search_sites, fine_search_interval, 0);
+#else
+	  first_pass_search_sites, fine_search_interval);
+#endif
 #else
   av1_make_default_fullpel_ms_params(&ms_params, cpi, x, bsize, ref_mv,
                                      first_pass_search_sites,
@@ -909,6 +913,7 @@
 static void first_pass_tile(AV1_COMP *cpi, ThreadData *td,
                             TileDataEnc *tile_data) {
   TileInfo *tile = &tile_data->tile_info;
+
   for (int mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
        mi_row += FP_MIB_SIZE) {
     av1_first_pass_row(cpi, td, tile_data, mi_row >> FP_MIB_SIZE_LOG2);
@@ -958,6 +963,7 @@
   AV1EncRowMultiThreadSync *const row_mt_sync = &tile_data->row_mt_sync;
 
   xd->tile = *tile;
+
   const YV12_BUFFER_CONFIG *const last_frame =
       get_ref_frame_yv12_buf(cm, LAST_FRAME_PROXY);
   const YV12_BUFFER_CONFIG *golden_frame =
@@ -1050,6 +1056,7 @@
           recon_yoffset, recon_uvoffset, src_yoffset, alt_ref_frame_yoffset,
           fp_block_size, this_intra_error, raw_motion_err_counts,
           raw_motion_err_list, &best_ref_mv, &last_mv, mb_stats);
+
       if (mb_col_in_tile == 0) {
         *first_top_mv = last_mv;
       }
@@ -1122,6 +1129,7 @@
   const YV12_BUFFER_CONFIG *golden_frame =
       get_ref_frame_yv12_buf(cm, GOLDEN_FRAME_PROXY);
   YV12_BUFFER_CONFIG *const this_frame = &cm->cur_frame->buf;
+
   // First pass code requires valid last and new frame buffers.
   assert(this_frame != NULL);
   assert(frame_is_intra_only(cm) || (last_frame != NULL));

diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index c3495e1..117a255 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c

@@ -192,7 +192,11 @@
       const int warp_h = AOMMIN(error_bsize_h, p_row + p_height - i);
       highbd_warp_plane(wm, ref, width, height, stride, tmp, j, i, warp_w,
                         warp_h, WARP_ERROR_BLOCK, subsampling_x, subsampling_y,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                        bd, &conv_params, NULL);
+#else
                         bd, &conv_params);
+#endif 
       gm_sumerr += av1_calc_highbd_frame_error(tmp, WARP_ERROR_BLOCK,
                                                dst + j + i * p_stride, warp_w,
                                                warp_h, p_stride, bd);

diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c
index 8180bf4..d07e654 100644
--- a/av1/encoder/global_motion_facade.c
+++ b/av1/encoder/global_motion_facade.c

@@ -417,7 +417,11 @@
     cm->global_motion[frame] = default_warp_params;
     RefCntBuffer *buf = get_ref_frame_buf(cm, frame);
     // Skip global motion estimation for invalid ref frames
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+    if (buf == NULL || av1_is_scaled(get_ref_scale_factors(cm, frame)) ||
+#else
     if (buf == NULL ||
+#endif
         (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) {
       continue;
     } else {

diff --git a/av1/encoder/interp_search.c b/av1/encoder/interp_search.c
index c355ffc..3eb5aa0 100644
--- a/av1/encoder/interp_search.c
+++ b/av1/encoder/interp_search.c

@@ -333,6 +333,7 @@
     const MV mv = mbmi->mv[ref].as_mv;
     int skip_hor_plane = 0;
     int skip_ver_plane = 0;
+
     for (int plane_idx = 0; plane_idx < AOMMAX(1, (num_planes - 1));
          ++plane_idx) {
       struct macroblockd_plane *const pd = &xd->plane[plane_idx];
@@ -344,6 +345,7 @@
                                     0,
 #endif  // CONFIG_OPTFLOW_REFINEMENT
                                     pd->subsampling_x, pd->subsampling_y);
+
       const int sub_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
       const int sub_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
       skip_hor_plane |= ((sub_x == 0) << plane_idx);

diff --git a/av1/encoder/lookahead.c b/av1/encoder/lookahead.c
index 251feaf..cd0f092 100644
--- a/av1/encoder/lookahead.c
+++ b/av1/encoder/lookahead.c

@@ -35,7 +35,6 @@
   if (ctx) {
     if (ctx->buf) {
       int i;
-
       for (i = 0; i < ctx->max_sz; i++) aom_free_frame_buffer(&ctx->buf[i].img);
       free(ctx->buf);
     }

diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index dd5ab29..792e1f3 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c

@@ -83,8 +83,16 @@
 #endif
 }
 
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+static INLINE void init_ms_buffers(MSBuffers *ms_buffers, const MACROBLOCK *x, int ref_idx) {
+#else
 static INLINE void init_ms_buffers(MSBuffers *ms_buffers, const MACROBLOCK *x) {
-  ms_buffers->ref = &x->e_mbd.plane[0].pre[0];
+#endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	ms_buffers->ref = &x->e_mbd.plane[0].pre[ref_idx];
+#else
+	ms_buffers->ref = &x->e_mbd.plane[0].pre[0];
+#endif
   ms_buffers->src = &x->plane[0].src;
 
   av1_set_ms_compound_refs(ms_buffers, NULL, NULL, 0, 0);
@@ -124,7 +132,11 @@
 #endif
 #endif
     const search_site_config search_sites[NUM_DISTINCT_SEARCH_METHODS],
-    int fine_search_interval) {
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	int fine_search_interval, int ref_idx) {
+#else
+	int fine_search_interval) {
+#endif
   const MV_SPEED_FEATURES *mv_sf = &cpi->sf.mv_sf;
 
 #if CONFIG_ADAPTIVE_MVD || CONFIG_TIP || CONFIG_FLEX_MVRES
@@ -143,8 +155,11 @@
   // High level params
   ms_params->bsize = bsize;
   ms_params->vfp = &cpi->fn_ptr[bsize];
-
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  init_ms_buffers(&ms_params->ms_buffers, x, ref_idx);
+#else
   init_ms_buffers(&ms_params->ms_buffers, x);
+#endif
 
   SEARCH_METHODS search_method = mv_sf->search_method;
   if (mv_sf->use_bsize_dependent_search_method) {
@@ -231,7 +246,11 @@
 #if CONFIG_FLEX_MVRES
                                        const MvSubpelPrecision pb_mv_precision,
 #endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+                                       const int *cost_list, int ref_idx) {
+#else
                                        const int *cost_list) {
+#endif
 
 #if CONFIG_ADAPTIVE_MVD || !CONFIG_FLEX_MVRES
   const AV1_COMMON *cm = &cpi->common;
@@ -269,7 +288,11 @@
   ms_params->cost_list = cond_cost_list_const(cpi, cost_list);
 
 #if CONFIG_TIP
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  if (is_tip_ref_frame(mbmi->ref_frame[ref_idx])) {
+#else
   if (is_tip_ref_frame(mbmi->ref_frame[0])) {
+#endif
     av1_set_tip_subpel_mv_search_range(&ms_params->mv_limits, &x->mv_limits);
   } else {
 #endif  // CONFIG_TIP
@@ -313,7 +336,11 @@
 
   // Ref and src buffers
   MSBuffers *ms_buffers = &ms_params->var_params.ms_buffers;
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  init_ms_buffers(ms_buffers, x, ref_idx);
+#else
   init_ms_buffers(ms_buffers, x);
+#endif
 #if CONFIG_FLEX_MVRES
   assert(ms_params->var_params.subpel_search_type &&
          "Subpel type 2_TAPS_ORIG is no longer supported!");
@@ -2488,7 +2515,7 @@
 // This function is called when we do joint motion search in comp_inter_inter
 // mode, or when searching for one component of an ext-inter compound mode.
 int av1_refining_search_8p_c(const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
-                             const FULLPEL_MV start_mv, FULLPEL_MV *best_mv) {
+	const FULLPEL_MV start_mv, FULLPEL_MV *best_mv) {
   static const search_neighbors neighbors[8] = {
     { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 },
     { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 },
@@ -2517,10 +2544,11 @@
   const int ref_stride = ref->stride;
 
   *best_mv = start_mv;
+
   clamp_fullmv(best_mv, mv_limits);
 
   unsigned int best_sad = get_mvpred_compound_sad(
-      ms_params, src, get_buf_from_fullmv(ref, best_mv), ref_stride);
+	  ms_params, src, get_buf_from_fullmv(ref, best_mv), ref_stride);
 #if CONFIG_FLEX_MVRES
   best_sad += mvsad_err_cost(*best_mv, mv_cost_params);
 #else
@@ -2582,8 +2610,8 @@
 // comp_inter_inter mode, or when searching for one component of an ext-inter
 // compound mode.
 int av1_refining_search_8p_c_low_precision(
-    const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv,
-    FULLPEL_MV *best_mv, int fast_mv_refinement) {
+	const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv,
+	FULLPEL_MV *best_mv, int fast_mv_refinement) {
   assert(ms_params->mv_cost_params.pb_mv_precision < MV_PRECISION_ONE_PEL);
   const int search_range =
       1 << (MV_PRECISION_ONE_PEL - ms_params->mv_cost_params.pb_mv_precision);
@@ -2614,12 +2642,11 @@
   const int ref_stride = ref->stride;
 
   *best_mv = start_mv;
-  clamp_fullmv(best_mv, mv_limits);
 
+  clamp_fullmv(best_mv, mv_limits);
   unsigned int best_sad = get_mvpred_compound_sad(
       ms_params, src, get_buf_from_fullmv(ref, best_mv), ref_stride);
   best_sad += mvsad_err_cost(*best_mv, mv_cost_params);
-
   for (int step = 0; step < num_of_search_steps; step++) {
     int best_site = -1;
     // TODO(Mohammed): remove retundant search points to reduce complexity
@@ -2633,7 +2660,7 @@
         sad = get_mvpred_compound_sad(
             ms_params, src, get_buf_from_fullmv(ref, &mv), ref_stride);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(mv, mv_cost_params);
+			    sad += mvsad_err_cost(mv, mv_cost_params);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = j;
@@ -2649,7 +2676,6 @@
       best_mv->col += neighbors[best_site].coord.col;
     }
   }
-
   return best_sad;
 }
 
@@ -3361,7 +3387,7 @@
 int av1_obmc_full_pixel_search(const FULLPEL_MV start_mv,
                                const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
                                const int step_param, FULLPEL_MV *best_mv) {
-  if (!ms_params->fast_obmc_search) {
+	if (!ms_params->fast_obmc_search) {
     const int do_refine = 1;
     const int bestsme = obmc_full_pixel_diamond(ms_params, start_mv, step_param,
                                                 do_refine, best_mv);
@@ -3388,7 +3414,6 @@
  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
  * could reduce the area.
  */
-
 // Returns the subpel offset used by various subpel variance functions [m]sv[a]f
 static INLINE int get_subpel_part(int x) { return x & 7; }
 
@@ -3400,7 +3425,6 @@
   const int offset = (mv.row >> 3) * buf->stride + (mv.col >> 3);
   return &buf->buf[offset];
 }
-
 // Estimates the variance of prediction residue using bilinear filter for fast
 // search.
 static INLINE int estimated_pref_error(
@@ -3432,7 +3456,6 @@
                      sse, second_pred);
   }
 }
-
 // Calculates the variance of prediction residue.
 static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *cm,
                                 const MV *this_mv,
@@ -3468,7 +3491,11 @@
       aom_highbd_comp_mask_upsampled_pred(
           xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, subpel_x_q3,
           subpel_y_q3, ref, ref_stride, mask, mask_stride, invert_mask, xd->bd,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+          subpel_search_type, is_scaled_ref);
+#else
           subpel_search_type);
+#endif          
     } else {
 #if CONFIG_CWP
       if (get_cwp_idx(xd->mi[0]) != CWP_EQUAL) {
@@ -3478,14 +3505,22 @@
         aom_highbd_dist_wtd_comp_avg_upsampled_pred(
             xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h,
             subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, &jcp_param,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+            subpel_search_type, is_scaled_ref);
+#else
             subpel_search_type);
+#endif
       } else
 #endif  // CONFIG_CWP
 
         aom_highbd_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv,
                                            pred, second_pred, w, h, subpel_x_q3,
                                            subpel_y_q3, ref, ref_stride, xd->bd,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                                           subpel_search_type, is_scaled_ref);
+#else
                                            subpel_search_type);
+#endif
     }
   } else {
     aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h,
@@ -3876,7 +3911,7 @@
     MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV *bestmv,
     const SUBPEL_SEARCH_VAR_PARAMS *var_params,
     const MV_COST_PARAMS *mv_cost_params, unsigned int *sse1, int *distortion) {
-  unsigned int besterr = upsampled_pref_error(xd, cm, bestmv, var_params, sse1);
+  int besterr = upsampled_pref_error(xd, cm, bestmv, var_params, sse1);
   *distortion = besterr;
 #if CONFIG_FLEX_MVRES
   besterr += mv_err_cost(*bestmv, mv_cost_params);
@@ -4345,7 +4380,6 @@
   int hstep = 8 >> round;  // Step size, initialized to 4/8=1/2 pel
 
   unsigned int besterr = INT_MAX;
-
   *bestmv = start_mv;
 
 #if CONFIG_FLEX_MVRES
@@ -4464,7 +4498,6 @@
     besterr = setup_center_error(xd, bestmv, var_params, mv_cost_params, sse1,
                                  distortion);
   }
-
   MV iter_center_mv = *start_mv;
   const int cand_pos[4][2] = {
     { 0, -1 },  // left
@@ -5848,11 +5881,20 @@
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
 
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+  const int is_scaled_ref = ms_buffers->src->width == ms_buffers->ref->width &&
+                            ms_buffers->src->height == ms_buffers->ref->height;
+#endif
+
   unsigned int besterr;
   DECLARE_ALIGNED(16, uint16_t, pred[MAX_SB_SQUARE]);
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h,
                             subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                            subpel_search_type, is_scaled_ref);
+#else
                             subpel_search_type, 0);
+#endif                            
   besterr = vfp->ovf(pred, w, wsrc, mask, sse);
 
   return besterr;

diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index 2e57f5e..2cacbaa 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h

@@ -130,7 +130,11 @@
 // =============================================================================
 typedef struct {
   // The reference buffer
-  const struct buf_2d *ref;
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	struct buf_2d *ref;
+#else
+	const struct buf_2d *ref;
+#endif
 
   // The source and predictors/mask used by translational search
   const struct buf_2d *src;
@@ -266,7 +270,12 @@
 #endif
 #endif
     const search_site_config search_sites[NUM_DISTINCT_SEARCH_METHODS],
-    int fine_search_interval);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	int fine_search_interval, int ref_idx);
+#else
+	int fine_search_interval);
+#endif
+
 
 // Sets up configs for fullpixel diamond search method.
 void av1_init_dsmotion_compensation(search_site_config *cfg, int stride);
@@ -354,11 +363,11 @@
 int av1_init_search_range(int size);
 
 int av1_refining_search_8p_c(const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
-                             const FULLPEL_MV start_mv, FULLPEL_MV *best_mv);
+	const FULLPEL_MV start_mv, FULLPEL_MV *best_mv);
 #if CONFIG_FLEX_MVRES
 int av1_refining_search_8p_c_low_precision(
-    const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv,
-    FULLPEL_MV *best_mv, int fast_mv_refinement);
+	const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv,
+	FULLPEL_MV *best_mv, int fast_mv_refinement);
 #endif
 
 int av1_full_pixel_search(const FULLPEL_MV start_mv,
@@ -398,6 +407,7 @@
 // =============================================================================
 //  Subpixel Motion Search
 // =============================================================================
+
 enum {
   EIGHTH_PEL,
   QUARTER_PEL,
@@ -432,6 +442,7 @@
   SUBPEL_SEARCH_VAR_PARAMS var_params;
 } SUBPEL_MOTION_SEARCH_PARAMS;
 
+
 #if CONFIG_JOINT_MVD
 // motion search for joint MVD coding
 int joint_mvd_search(const AV1_COMMON *const cm, MACROBLOCKD *xd,
@@ -477,7 +488,11 @@
 #if CONFIG_FLEX_MVRES
                                        const MvSubpelPrecision pb_mv_precision,
 #endif
-                                       const int *cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	const int *cost_list, int ref_idx);
+#else
+	const int *cost_list);
+#endif
 
 typedef int(fractional_mv_step_fp)(MACROBLOCKD *xd, const AV1_COMMON *const cm,
                                    const SUBPEL_MOTION_SEARCH_PARAMS *ms_params,

diff --git a/av1/encoder/model_rd.h b/av1/encoder/model_rd.h
index 64c2342..730847d 100644
--- a/av1/encoder/model_rd.h
+++ b/av1/encoder/model_rd.h

@@ -234,6 +234,7 @@
                        &bw, &bh);
 
     sse = calculate_sse(xd, p, pd, bw, bh);
+
     model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate,
                           &dist);
 

diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 891fa86..20e0b02 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c

@@ -256,7 +256,11 @@
 #if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
-                                     src_search_sites, fine_search_interval);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  src_search_sites, fine_search_interval, ref_idx);
+#else
+	  src_search_sites, fine_search_interval);
+#endif
 #else
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
                                      src_search_sites, fine_search_interval);
@@ -415,7 +419,11 @@
 #if CONFIG_FLEX_MVRES
                                       pb_mv_precision,
 #endif
-                                      cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		cost_list, ref_idx);
+#else
+		cost_list);
+#endif
     MV subpel_start_mv = get_mv_from_fullmv(&best_mv->as_fullmv);
     assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv));
 #if CONFIG_C071_SUBBLK_WARPMV
@@ -521,7 +529,7 @@
   int_mv curr_best_mv;
   const int ref = mbmi->ref_frame[ref_idx];
   const YV12_BUFFER_CONFIG *scaled_ref_frame =
-      av1_get_scaled_ref_frame(cpi, ref);
+	  av1_get_scaled_ref_frame(cpi, ref);
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
   const MvCosts *mv_costs = &x->mv_costs;
@@ -566,16 +574,20 @@
 #if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  NULL, 0, ref_idx);
+#else
+	  NULL, 0);
+#endif
 
-                                     NULL, 0);
 
   if (pb_mv_precision < MV_PRECISION_ONE_PEL)
-    bestsme = av1_refining_search_8p_c_low_precision(
-        &full_ms_params, start_fullmv, &curr_best_mv.as_fullmv,
-        cpi->sf.flexmv_sf.fast_mv_refinement);
+	  bestsme = av1_refining_search_8p_c_low_precision(
+		  &full_ms_params, start_fullmv, &curr_best_mv.as_fullmv,
+		  cpi->sf.flexmv_sf.fast_mv_refinement);
   else
-    bestsme = av1_refining_search_8p_c(&full_ms_params, start_fullmv,
-                                       &curr_best_mv.as_fullmv);
+	  bestsme = av1_refining_search_8p_c(&full_ms_params, start_fullmv,
+		  &curr_best_mv.as_fullmv);
 
   if (scaled_ref_frame) {
     // Swap back the original buffers for subpel motion search.
@@ -656,7 +668,11 @@
     unsigned int sse;
     SUBPEL_MOTION_SEARCH_PARAMS ms_params;
     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
-                                      pb_mv_precision, NULL);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		pb_mv_precision, NULL, ref_idx);
+#else
+		pb_mv_precision, NULL);
+#endif
     // ms_params.forced_stop = EIGHTH_PEL;
 
     MV start_mv1 = get_mv_from_fullmv(&curr_best_mv.as_fullmv);
@@ -726,7 +742,6 @@
     av1_get_scaled_ref_frame(cpi, refs[0]),
     av1_get_scaled_ref_frame(cpi, refs[1])
   };
-
   // Prediction buffer from second frame.
   DECLARE_ALIGNED(16, uint16_t, second_pred[MAX_SB_SQUARE]);
   int_mv best_mv;
@@ -762,6 +777,7 @@
         int i;
         for (i = 0; i < num_planes; i++)
           backup_yv12[ref][i] = xd->plane[i].pre[ref];
+
         av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
                              NULL, num_planes, &mbmi->chroma_ref_info);
       }
@@ -778,20 +794,30 @@
     ref_yv12[0] = xd->plane[plane].pre[0];
     ref_yv12[1] = xd->plane[plane].pre[1];
 
-    InterPredParams inter_pred_params;
-    const InterpFilter interp_filters = EIGHTTAP_REGULAR;
+	InterPredParams inter_pred_params;
+	const InterpFilter interp_filters = EIGHTTAP_REGULAR;
+	av1_init_inter_params(&inter_pred_params, pw, ph, mi_row * MI_SIZE,
+		mi_col * MI_SIZE, 0, 0, xd->bd, 0, &cm->sf_identity,
+		&ref_yv12[!id], interp_filters);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  av1_init_phase_offset(&inter_pred_params, cm);
+#endif
+  inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd);
 
-    av1_init_inter_params(&inter_pred_params, pw, ph, mi_row * MI_SIZE,
-                          mi_col * MI_SIZE, 0, 0, xd->bd, 0, &cm->sf_identity,
-                          &ref_yv12[!id], interp_filters);
-    inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd);
-
-    // Since we have scaled the reference frames to match the size of the
-    // current frame we must use a unit scaling factor during mode selection.
-    av1_enc_build_one_inter_predictor(second_pred, pw, &cur_mv[!id].as_mv,
-                                      &inter_pred_params);
-    // Do full-pixel compound motion search on the current reference frame.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[id];
+  // Since we have scaled the reference frames to match the size of the
+  // current frame we must use a unit scaling factor during mode selection.
+  av1_enc_build_one_inter_predictor(second_pred, pw, &cur_mv[!id].as_mv,
+                                    &inter_pred_params);
+  // #endif
+  //  Do full-pixel compound motion search on the current reference frame.
+  if (id) xd->plane[plane].pre[0] = ref_yv12[id];
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	if (id) {
+		struct scale_factors* tmp_sf = xd->block_ref_scale_factors[0];
+		xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[id];
+		xd->block_ref_scale_factors[id] = tmp_sf;
+	}
+#endif
 
 #if CONFIG_FLEX_MVRES && CONFIG_IBC_BV_IMPROVEMENT
     const int is_ibc_cost = 0;
@@ -808,7 +834,11 @@
 #endif
 #endif
                                        NULL,
-                                       /*fine_search_interval=*/0);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		/*fine_search_interval=*/0, 0);
+#else
+		/*fine_search_interval=*/0);
+#endif
 
     av1_set_ms_compound_refs(&full_ms_params.ms_buffers, second_pred, mask,
                              mask_stride, id);
@@ -827,16 +857,21 @@
     );
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 
+
     // Small-range full-pixel motion search.
 #if CONFIG_FLEX_MVRES
     if (pb_mv_precision < MV_PRECISION_ONE_PEL)
+        {
       bestsme = av1_refining_search_8p_c_low_precision(
           &full_ms_params, start_fullmv, &best_mv.as_fullmv,
           cpi->sf.flexmv_sf.fast_mv_refinement);
+        }
     else
 #endif
+    {
       bestsme = av1_refining_search_8p_c(&full_ms_params, start_fullmv,
-                                         &best_mv.as_fullmv);
+                                                   &best_mv.as_fullmv);
+  }
 
     // Restore the pointer to the first (possibly scaled) prediction buffer.
     if (id) xd->plane[plane].pre[0] = ref_yv12[0];
@@ -880,7 +915,11 @@
 #if CONFIG_FLEX_MVRES
                                         pb_mv_precision,
 #endif
-                                        NULL);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		  NULL, 0);
+#else
+		  NULL);
+#endif
       av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
                                mask, mask_stride, id);
       ms_params.forced_stop = EIGHTH_PEL;
@@ -904,6 +943,13 @@
 
     // Restore the pointer to the first prediction buffer.
     if (id) xd->plane[plane].pre[0] = ref_yv12[0];
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	if (id) {
+		struct scale_factors* tmp_sf = xd->block_ref_scale_factors[0];
+		xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[id];
+		xd->block_ref_scale_factors[id] = tmp_sf;
+	}
+#endif
     if (bestsme < last_besterr[id]) {
       cur_mv[id] = best_mv;
       last_besterr[id] = bestsme;
@@ -954,6 +1000,7 @@
 #endif
 
   struct buf_2d backup_yv12[MAX_MB_PLANE];
+
   const YV12_BUFFER_CONFIG *const scaled_ref_frame =
       av1_get_scaled_ref_frame(cpi, ref);
 
@@ -963,6 +1010,13 @@
     orig_yv12 = pd->pre[0];
     pd->pre[0] = pd->pre[ref_idx];
   }
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  if (ref_idx) {
+	  struct scale_factors* tmp_sf = xd->block_ref_scale_factors[0];
+	  xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+	  xd->block_ref_scale_factors[ref_idx] = tmp_sf;
+  }
+#endif
 
   if (scaled_ref_frame) {
     // Swap out the reference frame for a version that's been scaled to
@@ -977,7 +1031,6 @@
     av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
                          num_planes, &mbmi->chroma_ref_info);
   }
-
   int bestsme = INT_MAX;
   int_mv best_mv;
 
@@ -988,7 +1041,11 @@
 #if CONFIG_FLEX_MVRES
                                     mbmi->pb_mv_precision,
 #endif
-                                    NULL);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  NULL, 0);
+#else
+	  NULL);
+#endif
   ms_params.forced_stop = EIGHTH_PEL;
   bestsme = adaptive_mvd_search(cm, xd, &ms_params, ref_mv.as_mv,
                                 &best_mv.as_mv, &dis, &sse);
@@ -1000,16 +1057,23 @@
     }
   }
   // Restore the pointer to the first unscaled prediction buffer.
+#if CONFIG_2D_SR_RESTORE_UNSCALED_BUF
+  if (ref_idx) { 
+    pd->pre[ref_idx] = pd->pre[0];
+    pd->pre[0] = orig_yv12;
+  }
+#else
   if (ref_idx) pd->pre[0] = orig_yv12;
-
+#endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  if (ref_idx) {
+	  struct scale_factors* tmp_sf = xd->block_ref_scale_factors[0];
+	  xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+	  xd->block_ref_scale_factors[ref_idx] = tmp_sf;
+  }
+#endif
   if (bestsme < INT_MAX) {
     *this_mv = best_mv.as_mv;
-    const MV diff = { best_mv.as_mv.row - ref_mv.as_mv.row,
-                      best_mv.as_mv.col - ref_mv.as_mv.col };
-    if (diff.row != 0 && diff.col != 0) {
-      printf("assertion failure error!\n");
-    }
-    assert(diff.row == 0 || diff.col == 0);
   }
 
   *rate_mv = 0;
@@ -1044,7 +1108,11 @@
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  const MV_REFERENCE_FRAME refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
+#else
   const int ref = mbmi->ref_frame[ref_idx];
+#endif
   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
   struct macroblockd_plane *const pd = &xd->plane[0];
   const MvCosts *mv_costs = &x->mv_costs;
@@ -1056,50 +1124,113 @@
 #endif
 
 #if CONFIG_JOINT_MVD
+#if !CONFIG_2D_SR_SECOND_PRED_FIX
   InterPredParams inter_pred_params;
   if (is_joint_mvd_coding_mode(mbmi->mode)) {
-    const int pw = block_size_wide[bsize];
-    const int ph = block_size_high[bsize];
-    const int mi_row = xd->mi_row;
-    const int mi_col = xd->mi_col;
-    const int_mv ref_other_mv = av1_get_ref_mv(x, 1 - ref_idx);
-    other_mv->row = ref_other_mv.as_mv.row;
-    other_mv->col = ref_other_mv.as_mv.col;
-    struct buf_2d ref_yv12 = xd->plane[0].pre[!ref_idx];
-    av1_init_inter_params(&inter_pred_params, pw, ph, mi_row * MI_SIZE,
-                          mi_col * MI_SIZE, 0, 0, xd->bd, 0, &cm->sf_identity,
-                          &ref_yv12, mbmi->interp_fltr);
-    inter_pred_params.conv_params = get_conv_params(0, PLANE_TYPE_Y, xd->bd);
+	  const int pw = block_size_wide[bsize];
+	  const int ph = block_size_high[bsize];
+	  const int mi_row = xd->mi_row;
+	  const int mi_col = xd->mi_col;
+	  const int_mv ref_other_mv = av1_get_ref_mv(x, 1 - ref_idx);
+	  other_mv->row = ref_other_mv.as_mv.row;
+	  other_mv->col = ref_other_mv.as_mv.col;
+	  struct buf_2d ref_yv12 = xd->plane[0].pre[!ref_idx];
+	  av1_init_inter_params(&inter_pred_params, pw, ph, mi_row * MI_SIZE,
+		  mi_col * MI_SIZE, 0, 0, xd->bd, 0, &cm->sf_identity,
+		  &ref_yv12, mbmi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
+	  inter_pred_params.conv_params = get_conv_params(0, PLANE_TYPE_Y, xd->bd);
   }
+#endif
 #endif  // CONFIG_JOINT_MVD
 
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  struct buf_2d backup_yv12[2][MAX_MB_PLANE];
+  const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
+    av1_get_scaled_ref_frame(cpi, refs[ref_idx]),
+    av1_get_scaled_ref_frame(cpi, refs[1 - ref_idx])
+  };
+#else  // CONFIG_2D_SR_SECOND_PRED_FIX
   struct buf_2d backup_yv12[MAX_MB_PLANE];
   const YV12_BUFFER_CONFIG *const scaled_ref_frame =
       av1_get_scaled_ref_frame(cpi, ref);
+#endif  // CONFIG_2D_SR_SECOND_PRED_FIX
 
   // Check that this is either an interinter or an interintra block
   assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi)));
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  // Store the first prediction buffer.
+  if (ref_idx) {
+	  struct buf_2d orig_yv12 = pd->pre[0];
+	  pd->pre[0] = pd->pre[ref_idx];
+	  pd->pre[ref_idx] = orig_yv12;
 
+	  const struct scale_factors* tmp_sf = xd->block_ref_scale_factors[0];
+	  xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+	  xd->block_ref_scale_factors[ref_idx] = tmp_sf;
+  }
+#else
   // Store the first prediction buffer.
   struct buf_2d orig_yv12;
   if (ref_idx) {
     orig_yv12 = pd->pre[0];
     pd->pre[0] = pd->pre[ref_idx];
   }
-
-  if (scaled_ref_frame) {
-    // Swap out the reference frame for a version that's been scaled to
-    // match the resolution of the current frame, allowing the existing
-    // full-pixel motion search code to be used without additional
-    // modifications.
-    for (int i = 0; i < num_planes; i++) {
-      backup_yv12[i] = xd->plane[i].pre[0];
-    }
-    const int mi_row = xd->mi_row;
-    const int mi_col = xd->mi_col;
-    av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
-                         num_planes, &mbmi->chroma_ref_info);
+#endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  for (int idx = 0; idx < 2; idx++) {
+	  if (scaled_ref_frame[idx]) {
+		  // Swap out the reference frame for a version that's been scaled to
+		  // match the resolution of the current frame, allowing the existing
+		  // full-pixel motion search code to be used without additional
+		  // modifications.
+		  for (int i = 0; i < num_planes; i++) {
+			  backup_yv12[idx][i] = xd->plane[i].pre[idx];
+		  }
+		  const int mi_row = xd->mi_row;
+		  const int mi_col = xd->mi_col;
+		  av1_setup_pre_planes(xd, idx, scaled_ref_frame[idx], mi_row, mi_col, NULL,
+			  num_planes, &mbmi->chroma_ref_info);
+	  }
   }
+#else
+  if (scaled_ref_frame) {
+	  // Swap out the reference frame for a version that's been scaled to
+	  // match the resolution of the current frame, allowing the existing
+	  // full-pixel motion search code to be used without additional
+	  // modifications.
+	  for (int i = 0; i < num_planes; i++) {
+		  backup_yv12[i] = xd->plane[i].pre[0];
+	  }
+	  const int mi_row = xd->mi_row;
+	  const int mi_col = xd->mi_col;
+	  av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
+		  num_planes, &mbmi->chroma_ref_info);
+  }
+#endif
+
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  InterPredParams inter_pred_params;
+  if (is_joint_mvd_coding_mode(mbmi->mode)) {
+	  const int pw = block_size_wide[bsize];
+	  const int ph = block_size_high[bsize];
+	  const int mi_row = xd->mi_row;
+	  const int mi_col = xd->mi_col;
+	  const int_mv ref_other_mv = av1_get_ref_mv(x, 1 - ref_idx);
+	  other_mv->row = ref_other_mv.as_mv.row;
+	  other_mv->col = ref_other_mv.as_mv.col;
+	  struct buf_2d ref_yv12 = xd->plane[0].pre[1];
+	  av1_init_inter_params(&inter_pred_params, pw, ph, mi_row * MI_SIZE,
+		  mi_col * MI_SIZE, 0, 0, xd->bd, 0, &cm->sf_identity,
+		  &ref_yv12, mbmi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
+	  inter_pred_params.conv_params = get_conv_params(0, PLANE_TYPE_Y, xd->bd);
+  }
+#endif
 
   int bestsme = INT_MAX;
   int_mv best_mv;
@@ -1116,41 +1247,68 @@
 #endif
   if (is_adaptive_mvd
 #if IMPROVED_AMVD && CONFIG_JOINT_MVD
-      && !is_joint_amvd_coding_mode(mbmi->mode)
+	  && !is_joint_amvd_coding_mode(mbmi->mode)
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
-  ) {
-    int dis; /* TODO: use dis in distortion calculation later. */
-    unsigned int sse;
-    SUBPEL_MOTION_SEARCH_PARAMS ms_params;
-    av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv.as_mv,
+	  ) {
+	  int dis; /* TODO: use dis in distortion calculation later. */
+	  unsigned int sse;
+	  SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+	  av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv.as_mv,
 #if CONFIG_FLEX_MVRES
-                                      pb_mv_precision,
+		  pb_mv_precision,
 #endif
-                                      NULL);
-    av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
-                             mask, mask_stride, ref_idx);
-    ms_params.forced_stop = EIGHTH_PEL;
-    bestsme = adaptive_mvd_search(cm, xd, &ms_params, ref_mv.as_mv,
-                                  &best_mv.as_mv, &dis, &sse);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		  NULL, 0);
+#else
+		  NULL);
+#endif
+	  av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
+		  mask, mask_stride, ref_idx);
+	  ms_params.forced_stop = EIGHTH_PEL;
+	  bestsme = adaptive_mvd_search(cm, xd, &ms_params, ref_mv.as_mv,
+		  &best_mv.as_mv, &dis, &sse);
+#if CONFIG_2D_SR_RESTORE_SCALED_BUF
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	for (int idx = 0; idx < 2; idx++) {
+		  if (scaled_ref_frame[idx]) {
+			  // Swap back the original buffers for subpel motion search
+			  for (int i = 0; i < num_planes; i++) {
+				  xd->plane[i].pre[idx] = backup_yv12[idx][i];
+			  }
+		  }
+	}
+#else
+	if (scaled_ref_frame) {
+		// Swap back the original buffers for subpel motion search
+		for (int i = 0; i < num_planes; i++) {
+			xd->plane[i].pre[0] = backup_yv12[i];
+		}
+	}
+#endif
+#endif                                  
   } else
 #endif  // CONFIG_ADAPTIVE_MVD
 #if CONFIG_JOINT_MVD
-      if (mbmi->mode == JOINT_NEWMV
+	  if (mbmi->mode == JOINT_NEWMV
 #if CONFIG_OPTFLOW_REFINEMENT
-          || mbmi->mode == JOINT_NEWMV_OPTFLOW
+		  || mbmi->mode == JOINT_NEWMV_OPTFLOW
 #endif
-      ) {
-    int dis; /* TODO: use dis in distortion calculation later. */
-    unsigned int sse;
-    SUBPEL_MOTION_SEARCH_PARAMS ms_params;
-    av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv.as_mv,
+		  ) {
+		  int dis; /* TODO: use dis in distortion calculation later. */
+		  unsigned int sse;
+		  SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+		  av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv.as_mv,
 #if CONFIG_FLEX_MVRES
-                                      pb_mv_precision,
+			  pb_mv_precision,
 #endif
-                                      NULL);
-    av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
-                             mask, mask_stride, ref_idx);
-    ms_params.forced_stop = EIGHTH_PEL;
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+			  NULL, 0);
+#else
+			  NULL);
+#endif
+		  av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
+			  mask, mask_stride, ref_idx);
+		  ms_params.forced_stop = EIGHTH_PEL;
 #if CONFIG_FLEX_MVRES
     lower_mv_precision(this_mv, pb_mv_precision);
     if (pb_mv_precision < MV_PRECISION_ONE_PEL) {
@@ -1165,7 +1323,26 @@
                                  &best_other_mv.as_mv, second_pred,
                                  &inter_pred_params, NULL);
 #if CONFIG_FLEX_MVRES
-    }
+		  }
+#endif
+#if CONFIG_2D_SR_RESTORE_SCALED_BUF
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	for (int idx = 0; idx < 2; idx++) {
+		  if (scaled_ref_frame[idx]) {
+			  // Swap back the original buffers for subpel motion search
+			  for (int i = 0; i < num_planes; i++) {
+				  xd->plane[i].pre[idx] = backup_yv12[idx][i];
+			  }
+		  }
+	}
+#else
+	if (scaled_ref_frame) {
+		// Swap back the original buffers for subpel motion search
+		for (int i = 0; i < num_planes; i++) {
+			xd->plane[i].pre[0] = backup_yv12[i];
+		}
+	}
+#endif
 #endif
   } else
 #endif  // CONFIG_JOINT_MVD
@@ -1183,7 +1360,11 @@
 #if CONFIG_FLEX_MVRES
                                       pb_mv_precision,
 #endif
-                                      NULL);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		NULL, 0);
+#else
+		NULL);
+#endif
     av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
                              mask, mask_stride, ref_idx);
     ms_params.forced_stop = EIGHTH_PEL;
@@ -1196,6 +1377,25 @@
                                            &best_mv.as_mv, &dis, &sse, ref_idx,
                                            other_mv, &best_other_mv.as_mv,
                                            second_pred, &inter_pred_params);
+#if CONFIG_2D_SR_RESTORE_SCALED_BUF
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	for (int idx = 0; idx < 2; idx++) {
+		  if (scaled_ref_frame[idx]) {
+			  // Swap back the original buffers for subpel motion search
+			  for (int i = 0; i < num_planes; i++) {
+				  xd->plane[i].pre[idx] = backup_yv12[idx][i];
+			  }
+		  }
+	}
+#else
+	if (scaled_ref_frame) {
+		// Swap back the original buffers for subpel motion search
+		for (int i = 0; i < num_planes; i++) {
+			xd->plane[i].pre[0] = backup_yv12[i];
+		}
+	}
+#endif
+#endif
   } else
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
 #if CONFIG_ADAPTIVE_MVD || CONFIG_JOINT_MVD
@@ -1203,8 +1403,7 @@
 #endif  // CONFIG_ADAPTIVE_MVD || CONFIG_JOINT_MVD
     // Make motion search params
     FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
-
-    av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
+	av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
                                        &ref_mv.as_mv,
 #if CONFIG_FLEX_MVRES
                                        pb_mv_precision,
@@ -1213,7 +1412,11 @@
 #endif
 #endif
                                        NULL,
-                                       /*fine_search_interval=*/0);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		/*fine_search_interval=*/0, 0);
+#else
+		/*fine_search_interval=*/0);
+#endif
 
     av1_set_ms_compound_refs(&full_ms_params.ms_buffers, second_pred, mask,
                              mask_stride, ref_idx);
@@ -1239,24 +1442,34 @@
     // Small-range full-pixel motion search.
 #if CONFIG_FLEX_MVRES
     if (pb_mv_precision < MV_PRECISION_ONE_PEL) {
-      bestsme = av1_refining_search_8p_c_low_precision(
-          &full_ms_params, start_fullmv, &best_mv.as_fullmv,
-          cpi->sf.flexmv_sf.fast_mv_refinement);
+		bestsme = av1_refining_search_8p_c_low_precision(
+			&full_ms_params, start_fullmv, &best_mv.as_fullmv,
+			cpi->sf.flexmv_sf.fast_mv_refinement);
     } else {
 #endif
       // Small-range full-pixel motion search.
-      bestsme = av1_refining_search_8p_c(&full_ms_params, start_fullmv,
+		bestsme = av1_refining_search_8p_c(&full_ms_params, start_fullmv,
                                          &best_mv.as_fullmv);
 #if CONFIG_FLEX_MVRES
     }
 #endif
-
-    if (scaled_ref_frame) {
-      // Swap back the original buffers for subpel motion search.
-      for (int i = 0; i < num_planes; i++) {
-        xd->plane[i].pre[0] = backup_yv12[i];
-      }
-    }
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	for (int idx = 0; idx < 2; idx++) {
+		  if (scaled_ref_frame[idx]) {
+			  // Swap back the original buffers for subpel motion search
+			  for (int i = 0; i < num_planes; i++) {
+				  xd->plane[i].pre[idx] = backup_yv12[idx][i];
+			  }
+		  }
+	}
+#else
+	if (scaled_ref_frame) {
+		// Swap back the original buffers for subpel motion search.
+		for (int i = 0; i < num_planes; i++) {
+			xd->plane[i].pre[0] = backup_yv12[i];
+		}
+	}
+#endif
 
     if (cpi->common.features.cur_frame_force_integer_mv) {
       convert_fullmv_to_mv(&best_mv);
@@ -1279,13 +1492,17 @@
     if (use_fractional_mv) {
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
-      SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+	  SUBPEL_MOTION_SEARCH_PARAMS ms_params;
       av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
                                         &ref_mv.as_mv,
 #if CONFIG_FLEX_MVRES
                                         pb_mv_precision,
 #endif
-                                        NULL);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		  NULL, 0);
+#else
+		  NULL);
+#endif
       av1_set_ms_compound_refs(&ms_params.var_params.ms_buffers, second_pred,
                                mask, mask_stride, ref_idx);
       ms_params.forced_stop = EIGHTH_PEL;
@@ -1311,8 +1528,27 @@
 #endif  // CONFIG_ADAPTIVE_MVD || CONFIG_JOINT_MVD
 
   // Restore the pointer to the first unscaled prediction buffer.
-  if (ref_idx) pd->pre[0] = orig_yv12;
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  // Store the first prediction buffer.
+  if (ref_idx) {
+	  struct buf_2d orig_yv12 = pd->pre[0];
+	  pd->pre[0] = pd->pre[ref_idx];
+	  pd->pre[ref_idx] = orig_yv12;
 
+	  const struct scale_factors* tmp_sf = xd->block_ref_scale_factors[0];
+	  xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+	  xd->block_ref_scale_factors[ref_idx] = tmp_sf;
+	  }
+#else
+#if CONFIG_2D_SR_RESTORE_UNSCALED_BUF
+  if (ref_idx) { 
+    pd->pre[ref_idx] = pd->pre[0];
+    pd->pre[0] = orig_yv12;
+  }
+#else
+  if (ref_idx) pd->pre[0] = orig_yv12;
+#endif
+#endif
   if (bestsme < INT_MAX) *this_mv = best_mv.as_mv;
 
 #if CONFIG_JOINT_MVD
@@ -1386,6 +1622,9 @@
   av1_init_inter_params(&inter_pred_params, pw, ph, p_row, p_col,
                         pd->subsampling_x, pd->subsampling_y, xd->bd, 0, &sf,
                         &ref_yv12, mbmi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  av1_init_phase_offset(&inter_pred_params, cm);
+#endif
   inter_pred_params.conv_params = get_conv_params(0, plane, xd->bd);
 
   // Get the prediction block from the 'other' reference frame.
@@ -1580,7 +1819,11 @@
                                      is_ibc_cost,
 #endif
 #endif
-                                     src_search_sites, fine_search_interval);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  src_search_sites, fine_search_interval, ref_idx);
+#else
+	  src_search_sites, fine_search_interval);
+#endif
 #if CONFIG_FLEX_MVRES
   full_pel_lower_mv_precision(&start_mv, pb_mv_precision);
 #endif
@@ -1603,7 +1846,11 @@
 #if CONFIG_FLEX_MVRES
                                       pb_mv_precision,
 #endif
-                                      cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		cost_list, ref_idx);
+#else
+		cost_list);
+#endif
     // TODO(yunqing): integrate this into av1_make_default_subpel_ms_params().
     ms_params.forced_stop = cpi->sf.mv_sf.simple_motion_subpel_force_stop;
 
@@ -1727,7 +1974,11 @@
                                      is_ibc_cost,
 #endif
 #endif
-                                     src_search_sites, fine_search_interval);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  src_search_sites, fine_search_interval, ref_idx);
+#else
+	  src_search_sites, fine_search_interval);
+#endif
 #if CONFIG_FLEX_MVRES
   full_pel_lower_mv_precision(&start_mv, pb_mv_precision);
 #endif
@@ -1751,7 +2002,11 @@
 #if CONFIG_FLEX_MVRES
                                       pb_mv_precision,
 #endif
-                                      cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		cost_list, ref_idx);
+#else
+		cost_list);
+#endif
     // TODO(yunqing): integrate this into av1_make_default_subpel_ms_params().
     ms_params.forced_stop = cpi->sf.mv_sf.simple_motion_subpel_force_stop;
 

diff --git a/av1/encoder/mv_prec.c b/av1/encoder/mv_prec.c
index 3746342..9e00614 100644
--- a/av1/encoder/mv_prec.c
+++ b/av1/encoder/mv_prec.c

@@ -293,8 +293,10 @@
   int use_fractional_mv = !cpi->common.features.cur_frame_force_integer_mv;
 
 #if CONFIG_ADAPTIVE_MVD
+#if !CONFIG_ADAPTIVE_MVD_TEST1
   if (is_adaptive_mvd && (mv_class != MV_CLASS_0 || int_part > 0))
     use_fractional_mv = 0;
+#endif
 #endif  // CONFIG_ADAPTIVE_MVD
 #if CONFIG_FLEX_MVRES
   int frac_part_rate = 0, frac_part_rate_qpel = 0;

diff --git a/av1/encoder/pickccso.c b/av1/encoder/pickccso.c
index 1dd7be9..15b2199 100644
--- a/av1/encoder/pickccso.c
+++ b/av1/encoder/pickccso.c

@@ -583,6 +583,7 @@
       ((mi_params->mi_cols >> xd->plane[plane].subsampling_x) +
        (1 << log2_filter_unit_size >> 2) - 1) /
       (1 << log2_filter_unit_size >> 2);
+
   const int sb_count = ccso_nvfb * ccso_nhfb;
   const int pic_height_c = xd->plane[plane].dst.height;
   const int pic_width_c = xd->plane[plane].dst.width;

diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 452ab8d..a7d01de 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c

@@ -29,6 +29,7 @@
 #include "av1/encoder/picklpf.h"
 
 #include <float.h>
+
 #define CHROMA_LAMBDA_MULT 6
 
 static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc,
@@ -290,21 +291,20 @@
         vert_bits + (offsets[off_ind] == vert_offset ? 0 : DF_PAR_BITS);
     best_bits = vert_bits + (offset_best == vert_offset ? 0 : DF_PAR_BITS);
   }
-
   double best_cost =
       RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult * chroma_lambda_mult, best_bits,
                                      best_err, cm->seq_params.bit_depth);
   double start_cost =
-      RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult * chroma_lambda_mult, start_bits,
-                                     start_err, cm->seq_params.bit_depth);
+	  RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult * chroma_lambda_mult, start_bits,
+		  start_err, cm->seq_params.bit_depth);
 
 #else
   double best_cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
       x->rdmult * chroma_lambda_mult, offset_best ? DF_PAR_BITS : 0, best_err,
       cm->seq_params.bit_depth);
   double start_cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult * chroma_lambda_mult, offsets[off_ind] ? DF_PAR_BITS : 0,
-      start_err, cm->seq_params.bit_depth);
+	  x->rdmult * chroma_lambda_mult, offsets[off_ind] ? DF_PAR_BITS : 0,
+	  start_err, cm->seq_params.bit_depth);
 #endif  // DF_DUAL
 
   if (best_cost_ret) *best_cost_ret = AOMMIN(best_cost, start_cost);

diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index eef3d93..18afbca 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c

@@ -185,6 +185,9 @@
   bool is_cross_filter_round;
 #endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   AV1PixelRect tile_rect;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  bool skip_acc_txskip_flag;
+#endif  
 } RestSearchCtxt;
 
 #if CONFIG_WIENER_NONSEP
@@ -299,7 +302,11 @@
 static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
                                     const RestorationTileLimits *limits,
                                     const AV1PixelRect *tile_rect,
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+                                    RestorationUnitInfo *rui) {
+#else
                                     const RestorationUnitInfo *rui) {
+#endif                                    
   const AV1_COMMON *const cm = rsc->cm;
   const int plane = rsc->plane;
   const int is_uv = plane > 0;
@@ -311,6 +318,9 @@
   // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
   // also used in encoder.
   const int optimized_lr = 0;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rui->skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif
 #if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   if (rsc->is_cross_filter_round) {
     // copy the pre-filtered data to dst buffer, this implementation could be
@@ -915,9 +925,13 @@
 
   rusi->sse[RESTORE_SGRPROJ] =
       try_restoration_unit(rsc, limits, &rsc->tile_rect, &rui);
-
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth, av1_superres_scaled(cm) ? (cm->superres_scale_denominator << 1)/ SCALE_NUMERATOR : 2);
+#else
   double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth);
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth);
+#endif
 
 #if CONFIG_LR_MERGE_COEFFS
   Vector *current_unit_stack = rsc->unit_stack;
@@ -928,14 +942,23 @@
   const int bank_ref_base = rusi->sgrproj_info.bank_ref;
   // Only test the reference in rusi->sgrproj_info.bank_ref, generated from
   // the count call above.
-
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_nomerge_base = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth, av1_superres_scaled(cm) ? (cm->superres_scale_denominator << 1)/ SCALE_NUMERATOR : 2);
+#else
   double cost_nomerge_base = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth);
+	  x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth);
+#endif
   const int bits_min = x->mode_costs.sgrproj_restore_cost[1] +
                        x->mode_costs.merged_param_cost[1] +
                        (1 << AV1_PROB_COST_SHIFT);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  const double cost_min = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_min >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth, av1_superres_scaled(cm) ? (cm->superres_scale_denominator << 1)/ SCALE_NUMERATOR : 2);
+#else
   const double cost_min = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_min >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth);
+	  x->rdmult, bits_min >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth);
+#endif
   const double cost_nomerge_thr = (cost_nomerge_base + 3 * cost_min) / 4;
   RestorationType rtype =
       (cost_none <= cost_nomerge_thr) ? RESTORE_NONE : RESTORE_SGRPROJ;
@@ -1062,9 +1085,15 @@
       if (old_rusi->best_rtype[RESTORE_SGRPROJ - 1] == RESTORE_SGRPROJ &&
           !check_sgrproj_eq(&old_rusi->sgrproj_info, ref_sgrproj_info_cand))
         continue;
-      cost_nomerge_cand +=
-          RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, old_unit->current_bits >> 4,
-                                         old_unit->current_sse, bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	  cost_nomerge_cand +=
+		  RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(x->rdmult, old_unit->current_bits >> 4,
+			  old_unit->current_sse, bit_depth, av1_superres_scaled(cm) ? (cm->superres_scale_denominator << 1)/ SCALE_NUMERATOR : 2);
+#else
+	  cost_nomerge_cand +=
+		  RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, old_unit->current_bits >> 4,
+			  old_unit->current_sse, bit_depth);
+#endif
     }
 
     // Iterate through vector to get sse and bits for each on the new filter.
@@ -1101,9 +1130,15 @@
         old_unit->merge_bits_cand =
             x->mode_costs.sgrproj_restore_cost[1] + merge_bits;
       }
-      cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
-          x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
-          bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	  cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+		  x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
+		  bit_depth, av1_superres_scaled(cm) ? (cm->superres_scale_denominator << 1)/ SCALE_NUMERATOR : 2);
+#else
+	  cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
+		  x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
+		  bit_depth);
+#endif
     }
     if (cost_merge_cand - cost_nomerge_cand < cost_merge - cost_nomerge) {
       begin_idx = begin_idx_cand;
@@ -1192,8 +1227,13 @@
       x->mode_costs.sgrproj_restore_cost[1] +
       count_sgrproj_bits(&x->mode_costs, &rusi->sgrproj_info,
                          &rsc->sgrproj_bank);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_sgr = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth, av1_superres_scaled(cm) ? (cm->superres_scale_denominator << 1)/ SCALE_NUMERATOR : 2);
+#else
   double cost_sgr = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth);
+	  x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ], bit_depth);
+#endif
   if (rusi->sgrproj_info.ep < 10)
     cost_sgr *=
         (1 + DUAL_SGR_PENALTY_MULT * rsc->lpf_sf->dual_sgr_penalty_level);
@@ -1277,17 +1317,32 @@
   initialize_rui_for_nonsep_search(rsc, &rui);
 
   rui.restoration_type = RESTORE_PC_WIENER;
+
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    rsc->skip_acc_txskip_flag = (rsc->cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+    rui.skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif  
+
   rusi->sse[RESTORE_PC_WIENER] =
       try_restoration_unit(rsc, limits, &rsc->tile_rect, &rui);
-
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth);
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth);
+#endif
 
   const int64_t bits_pc_wiener =
       x->mode_costs.pc_wiener_restore_cost[1] +
       (count_pc_wiener_bits() << AV1_PROB_COST_SHIFT);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_pc_wiener = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_pc_wiener >> 4, rusi->sse[RESTORE_PC_WIENER], bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_pc_wiener = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_pc_wiener >> 4, rusi->sse[RESTORE_PC_WIENER], bit_depth);
+	  x->rdmult, bits_pc_wiener >> 4, rusi->sse[RESTORE_PC_WIENER], bit_depth);
+#endif
 
   RestorationType rtype =
       (cost_pc_wiener < cost_none) ? RESTORE_PC_WIENER : RESTORE_NONE;
@@ -1710,7 +1765,11 @@
 
 // If limits != NULL, calculates error for current restoration unit.
 // Otherwise, calculates error for all units in the stack using stored limits.
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+static int64_t calc_finer_tile_search_error(RestSearchCtxt *rsc,
+#else
 static int64_t calc_finer_tile_search_error(const RestSearchCtxt *rsc,
+#endif
                                             const RestorationTileLimits *limits,
                                             const AV1PixelRect *tile,
                                             RestorationUnitInfo *rui) {
@@ -1718,6 +1777,10 @@
 #if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   if (rsc->is_cross_filter_round) rui->wienerns_cross_info = rui->wienerns_info;
 #endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rsc->skip_acc_txskip_flag = (rsc->cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+  rui->skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif 
 #if CONFIG_LR_MERGE_COEFFS
   if (limits != NULL) {
     err = try_restoration_unit(rsc, limits, tile, rui);
@@ -1744,11 +1807,20 @@
 
 #if CONFIG_WIENER_NONSEP && CONFIG_LR_MERGE_COEFFS
 // This function resets the dst buffers using the correct filters.
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+static int64_t reset_unit_stack_dst_buffers(RestSearchCtxt *rsc,
+#else
 static int64_t reset_unit_stack_dst_buffers(const RestSearchCtxt *rsc,
+#endif
                                             const RestorationTileLimits *limits,
                                             const AV1PixelRect *tile,
                                             RestorationUnitInfo *rui) {
   int64_t err = 0;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    rsc->skip_acc_txskip_flag = (rsc->cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+    rui->skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif  
+
   if (limits != NULL) {
     err = try_restoration_unit(rsc, limits, tile, rui);
   } else {
@@ -1824,8 +1896,13 @@
 #else
   int64_t bits = 0;
 #endif  // RD_WIENER_REFINEMENT_SEARCH
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(x->rdmult, bits >> 4, err,
+	  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, bits >> 4, err,
-                                               rsc->cm->seq_params.bit_depth);
+	  rsc->cm->seq_params.bit_depth);
+#endif
   int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
                     WIENER_FILT_TAP2_MINV };
   int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV,
@@ -1853,8 +1930,13 @@
 #else
           int64_t bits2 = 0;
 #endif  // RD_WIENER_REFINEMENT_SEARCH
-          double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-              x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#endif
           if (cost2 > cost) {
             plane_wiener->hfilter[p] += s;
             plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
@@ -1887,8 +1969,13 @@
 #else
           int64_t bits2 = 0;
 #endif  // RD_WIENER_REFINEMENT_SEARCH
-          double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-              x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#endif
           if (cost2 > cost) {
             plane_wiener->hfilter[p] -= s;
             plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
@@ -1922,8 +2009,13 @@
 #else
           int64_t bits2 = 0;
 #endif  // RD_WIENER_REFINEMENT_SEARCH
-          double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-              x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#endif
           if (cost2 > cost) {
             plane_wiener->vfilter[p] += s;
             plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
@@ -1956,8 +2048,13 @@
 #else
           int64_t bits2 = 0;
 #endif  // RD_WIENER_REFINEMENT_SEARCH
-          double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-              x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		  double cost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			  x->rdmult, bits2 >> 4, err2, rsc->cm->seq_params.bit_depth);
+#endif
           if (cost2 > cost) {
             plane_wiener->vfilter[p] -= s;
             plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
@@ -2055,6 +2152,10 @@
   rui.restoration_type = RESTORE_WIENER;
   finalize_sym_filter(reduced_wiener_win, vfilter, rui.wiener_info.vfilter);
   finalize_sym_filter(reduced_wiener_win, hfilter, rui.wiener_info.hfilter);
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rsc->skip_acc_txskip_flag = (rsc->cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+  rui.skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif  
 
   // Filter score computes the value of the function x'*A*x - x'*b for the
   // learned filter and compares it against identity filer. If there is no
@@ -2082,10 +2183,15 @@
     assert(rui.wiener_info.hfilter[0] == 0 &&
            rui.wiener_info.hfilter[WIENER_WIN - 1] == 0);
   }
-
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE],
+	  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE],
-      rsc->cm->seq_params.bit_depth);
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE],
+	  rsc->cm->seq_params.bit_depth);
+#endif
 
 #if CONFIG_LR_MERGE_COEFFS
   Vector *current_unit_stack = rsc->unit_stack;
@@ -2096,16 +2202,27 @@
   const int bank_ref_base = rusi->wiener_info.bank_ref;
   // Only test the reference in rusi->wiener_info.bank_ref, generated from
   // the count call above.
-
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_nomerge_base = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_WIENER],
+	  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_nomerge_base = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_WIENER],
-      rsc->cm->seq_params.bit_depth);
+	  x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_WIENER],
+	  rsc->cm->seq_params.bit_depth);
+#endif
   const int bits_min = x->mode_costs.wiener_restore_cost[1] +
                        x->mode_costs.merged_param_cost[1] +
                        (1 << AV1_PROB_COST_SHIFT);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  const double cost_min = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_min >> 4, rusi->sse[RESTORE_WIENER],
+	  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   const double cost_min = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_min >> 4, rusi->sse[RESTORE_WIENER],
-      rsc->cm->seq_params.bit_depth);
+	  x->rdmult, bits_min >> 4, rusi->sse[RESTORE_WIENER],
+	  rsc->cm->seq_params.bit_depth);
+#endif
   const double cost_nomerge_thr = (cost_nomerge_base + 3 * cost_min) / 4;
   RestorationType rtype =
       (cost_none <= cost_nomerge_thr) ? RESTORE_NONE : RESTORE_WIENER;
@@ -2235,10 +2352,15 @@
       if (old_rusi->best_rtype[RESTORE_WIENER - 1] == RESTORE_WIENER &&
           !check_wiener_eq(&old_rusi->wiener_info, ref_wiener_info_cand))
         continue;
-
-      cost_nomerge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
-          x->rdmult, old_unit->current_bits >> 4, old_unit->current_sse,
-          rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	  cost_nomerge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+		  x->rdmult, old_unit->current_bits >> 4, old_unit->current_sse,
+		  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+	  cost_nomerge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
+		  x->rdmult, old_unit->current_bits >> 4, old_unit->current_sse,
+		  rsc->cm->seq_params.bit_depth);
+#endif
       for (int index = 0; index < WIENER_WIN2; ++index) {
         M_AVG[index] += old_unit->M[index] / current_unit_indices->size;
       }
@@ -2303,9 +2425,15 @@
         old_unit->merge_bits_cand =
             x->mode_costs.wiener_restore_cost[1] + merge_bits;
       }
-      cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
-          x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
-          rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	  cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+		  x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
+		  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+	  cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
+		  x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
+		  rsc->cm->seq_params.bit_depth);
+#endif
     }
     if (cost_merge_cand - cost_nomerge_cand < cost_merge - cost_nomerge) {
       begin_idx = begin_idx_cand;
@@ -2394,10 +2522,15 @@
       x->mode_costs.wiener_restore_cost[1] +
       count_wiener_bits(wiener_win, &x->mode_costs, &rusi->wiener_info,
                         &rsc->wiener_bank);
-
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_wiener = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER],
+	  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_wiener = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER],
-      rsc->cm->seq_params.bit_depth);
+	  x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER],
+	  rsc->cm->seq_params.bit_depth);
+#endif
 
   RestorationType rtype =
       (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
@@ -2647,8 +2780,13 @@
       count_wienerns_bits(rsc->plane, &x->mode_costs, &curr, ref_wienerns_bank,
                           nsfilter_params, wiener_class_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double best_cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, best_bits >> 4, best_err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double best_cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, best_bits >> 4, best_err, rsc->cm->seq_params.bit_depth);
+	  x->rdmult, best_bits >> 4, best_err, rsc->cm->seq_params.bit_depth);
+#endif
 
   int is_uv = (rui->plane != AOM_PLANE_Y);
   const int beg_feat = 0;
@@ -2692,8 +2830,13 @@
               rsc->plane, &x->mode_costs, &rui->wienerns_info,
               ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-          const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-              x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		  const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			  x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		  const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			  x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
           if (cost < best_cost) {
             no_improv = 0;
             best_err = err;
@@ -2741,8 +2884,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2769,8 +2917,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2801,8 +2954,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2837,8 +2995,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2864,8 +3027,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2894,8 +3062,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2928,8 +3101,13 @@
             count_wienerns_bits(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-            x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_err = err;
           best_cost = cost;
@@ -2992,8 +3170,13 @@
               rsc->plane, &x->mode_costs, &rui->wienerns_info,
               ref_wienerns_bank, nsfilter_params, c_id);
 #endif  // CONFIG_LR_MERGE_COEFFS
-          const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-              x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		  const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+			  x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		  const double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+			  x->rdmult, bits >> 4, err, rsc->cm->seq_params.bit_depth);
+#endif
           if (cost < best_cost) {
             no_improv = 0;
             best_err = err;
@@ -3342,9 +3525,15 @@
             rui->plane, &rsc->x->mode_costs, &rui->wienerns_info,
             &rsc->wienerns_bank, nsfilter_params, ALL_WIENERNS_CLASSES);
 #endif  // CONFIG_LR_MERGE_COEFFS
-        double cost =
-            RDCOST_DBL_WITH_NATIVE_BD_DIST(rsc->x->rdmult, bits >> 4, real_errq,
-                                           rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+		double cost =
+			RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(rsc->x->rdmult, bits >> 4, real_errq,
+				rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+		double cost =
+			RDCOST_DBL_WITH_NATIVE_BD_DIST(rsc->x->rdmult, bits >> 4, real_errq,
+				rsc->cm->seq_params.bit_depth);
+#endif
         if (cost < best_cost) {
           best_cost = cost;
           copy_nsfilter_taps(&best, &rui->wienerns_info);
@@ -3435,6 +3624,11 @@
   double cost_merge_cand = 0;
   int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 };
   rui_merge_cand->wiener_class_id_restrict = wiener_class_id;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+    rsc->skip_acc_txskip_flag = (rsc->cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+    rui_merge_cand->skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif  
+
   bool has_begun = false;
   VECTOR_FOR_EACH(current_unit_stack, listed_unit) {
     RstUnitSnapshot *old_unit = (RstUnitSnapshot *)(listed_unit.pointer);
@@ -3503,9 +3697,15 @@
       old_unit->merge_bits_cand =
           x->mode_costs.wienerns_restore_cost[1] + merge_bits;
     }
-    cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
-        x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
-        bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+		x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
+		bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+	cost_merge_cand += RDCOST_DBL_WITH_NATIVE_BD_DIST(
+		x->rdmult, old_unit->merge_bits_cand >> 4, old_unit->merge_sse_cand,
+		bit_depth);
+#endif
   }
   return cost_merge_cand;
 }
@@ -3537,10 +3737,15 @@
         !check_wienerns_eq(&old_rusi->wienerns_info, ref_wienerns_info_cand,
                            nsfilter_params->ncoeffs, wiener_class_id))
       continue;
-
-    cost_nomerge_cand +=
-        RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, old_unit->current_bits >> 4,
-                                       old_unit->current_sse, bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	cost_nomerge_cand +=
+		RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(x->rdmult, old_unit->current_bits >> 4,
+			old_unit->current_sse, bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+	cost_nomerge_cand +=
+		RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, old_unit->current_bits >> 4,
+			old_unit->current_sse, bit_depth);
+#endif
 
     for (int index = 0; index < dim_A; ++index) {
       solver_A_AVG[index] += old_unit->A[index + offset_A];
@@ -3623,8 +3828,13 @@
   const MACROBLOCK *const x = rsc->x;
   const int64_t bits_none = x->mode_costs.wienerns_restore_cost[0];
   const int bit_depth = rsc->cm->seq_params.bit_depth;
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth);
+	  x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE], bit_depth);
+#endif
 
   RestorationUnitInfo rui;
   initialize_rui_for_nonsep_search(rsc, &rui);
@@ -3699,14 +3909,25 @@
   memcpy(rui.wienerns_info.bank_ref_for_class,
          rusi->wienerns_info.bank_ref_for_class,
          num_classes * sizeof(*ns_bank_ref_base));
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_nomerge_base = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_WIENER_NONSEP],
+	  bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_nomerge_base = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_WIENER_NONSEP],
-      bit_depth);
+	  x->rdmult, bits_nomerge_base >> 4, rusi->sse[RESTORE_WIENER_NONSEP],
+	  bit_depth);
+#endif
   const int bits_min = x->mode_costs.wienerns_restore_cost[1] +
                        x->mode_costs.merged_param_cost[1] +
                        (1 << AV1_PROB_COST_SHIFT);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  const double cost_min = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_min >> 4, rusi->sse[RESTORE_WIENER_NONSEP], bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   const double cost_min = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_min >> 4, rusi->sse[RESTORE_WIENER_NONSEP], bit_depth);
+	  x->rdmult, bits_min >> 4, rusi->sse[RESTORE_WIENER_NONSEP], bit_depth);
+#endif
   const double cost_nomerge_thr = (cost_nomerge_base + 3 * cost_min) / 4;
   const RestorationType rtype =
       (cost_none <= cost_nomerge_thr) ? RESTORE_NONE : RESTORE_WIENER_NONSEP;
@@ -4005,9 +4226,15 @@
       count_wienerns_bits(rui.plane, &x->mode_costs, &rusi->wienerns_info,
                           &rsc->wienerns_bank, nsfilter_params,
                           ALL_WIENERNS_CLASSES);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  double cost_wienerns = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(
+	  x->rdmult, bits_wienerns >> 4, rusi->sse[RESTORE_WIENER_NONSEP],
+	  bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   double cost_wienerns = RDCOST_DBL_WITH_NATIVE_BD_DIST(
-      x->rdmult, bits_wienerns >> 4, rusi->sse[RESTORE_WIENER_NONSEP],
-      bit_depth);
+	  x->rdmult, bits_wienerns >> 4, rusi->sse[RESTORE_WIENER_NONSEP],
+	  bit_depth);
+#endif
   const RestorationType rtype =
       (cost_wienerns < cost_none) ? RESTORE_WIENER_NONSEP : RESTORE_NONE;
   rusi->best_rtype[RESTORE_WIENER_NONSEP - 1] = rtype;
@@ -4142,8 +4369,13 @@
 
     const int64_t sse = rusi->sse[r];
     int64_t bits = count_switchable_bits(r, rsc, rusi);
-    double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, bits >> 4, sse,
-                                                 rsc->cm->seq_params.bit_depth);
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+	double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(x->rdmult, bits >> 4, sse,
+		rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
+	double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, bits >> 4, sse,
+		rsc->cm->seq_params.bit_depth);
+#endif
     if (r == RESTORE_SGRPROJ && rusi->sgrproj_info.ep < 10)
       cost *= (1 + DUAL_SGR_PENALTY_MULT * rsc->lpf_sf->dual_sgr_penalty_level);
     if (r == 0 || cost < best_cost) {
@@ -4347,7 +4579,11 @@
                                int *processed, rest_unit_visitor_t fun) {
   const int is_uv = rsc->plane > 0;
   const int ss_y = is_uv && rsc->cm->seq_params.subsampling_y;
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  RestorationInfo *rsi = &rsc->cm->rst_info[rsc->plane];
+#else
   const RestorationInfo *rsi = &rsc->cm->rst_info[rsc->plane];
+#endif  
   const int ru_size = rsi->restoration_unit_size;
   TileInfo tile_info;
   av1_tile_set_row(&tile_info, rsc->cm, tile_row);
@@ -4355,12 +4591,28 @@
   assert(tile_info.mi_row_start < tile_info.mi_row_end);
   assert(tile_info.mi_col_start < tile_info.mi_col_end);
 
+#if CONFIG_2D_SR_RESTORATION_BUG_FIX
+  int scaled_mi_row_end = av1_superres_scaled(rsc->cm) ? tile_info.mi_row_end * rsc->cm->superres_scale_denominator / SCALE_NUMERATOR : tile_info.mi_row_end;
+  int scaled_mi_col_end = av1_superres_scaled(rsc->cm) ? tile_info.mi_col_end * rsc->cm->superres_scale_denominator / SCALE_NUMERATOR : tile_info.mi_col_end;
+#endif  // CONFIG_2D_SR_RESTORATION_BUG_FIX
+
   reset_rsc(rsc);
   rsc_on_tile(rsc, *processed);
+ #if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rsc->skip_acc_txskip_flag = (rsc->cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+  rsi->skip_acc_txskip_flag = rsc->skip_acc_txskip_flag;
+#endif        
+#if CONFIG_2D_SR_RESTORATION_BUG_FIX
+  for (int mi_row = tile_info.mi_row_start; mi_row < scaled_mi_row_end;
+       mi_row += rsc->cm->seq_params.mib_size) {
+    for (int mi_col = tile_info.mi_col_start; mi_col < scaled_mi_col_end;
+         mi_col += rsc->cm->seq_params.mib_size) {
+#else  // CONFIG_2D_SR_RESTORATION_BUG_FIX
   for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
        mi_row += rsc->cm->seq_params.mib_size) {
     for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
          mi_col += rsc->cm->seq_params.mib_size) {
+#endif  // CONFIG_2D_SR_RESTORATION_BUG_FIX
       int rrow0, rrow1, rcol0, rcol1;
       if (av1_loop_restoration_corners_in_sb(
               rsc->cm, rsc->plane, mi_row, mi_col, rsc->cm->seq_params.sb_size,
@@ -4403,9 +4655,15 @@
       total_sse += rsc->sse;
     }
   }
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+  return RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(rsc->x->rdmult, total_bits >> 4,
+	  total_sse,
+	  rsc->cm->seq_params.bit_depth, av1_superres_scaled(rsc->cm) ? (rsc->cm->superres_scale_denominator << 1) / SCALE_NUMERATOR : 2);
+#else
   return RDCOST_DBL_WITH_NATIVE_BD_DIST(rsc->x->rdmult, total_bits >> 4,
-                                        total_sse,
-                                        rsc->cm->seq_params.bit_depth);
+	  total_sse,
+	  rsc->cm->seq_params.bit_depth);
+#endif
 }
 
 static void gather_stats_rest_type(RestSearchCtxt *rsc, RestorationType rtype) {
@@ -4424,7 +4682,6 @@
 #if CONFIG_WIENER_NONSEP
   if (rtype == RESTORE_WIENER_NONSEP) aom_vector_clear(rsc->wienerns_stats);
 #endif  // CONFIG_WIENER_NONSEP
-
   if (funs[rtype]) process_by_rutile(rsc, funs[rtype]);
 }
 
@@ -4580,6 +4837,11 @@
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 #endif  // CONFIG_WIENER_NONSEP
 
+
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rsc.skip_acc_txskip_flag = (cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+#endif
+
 #if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   rsc.is_cross_filter_round = 0;
 #endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
@@ -4858,6 +5120,10 @@
   assert(luma_buf != NULL);
   rsc.luma = luma;
 
+#if CONFIG_2D_SR_SET_TX_SKIP_ZERO
+  rsc.skip_acc_txskip_flag = (cm->superres_scale_denominator != SCALE_NUMERATOR) ? 1 : 0;
+#endif  
+
   rsc.is_cross_filter_round = 1;
 
   for (int plane = plane_start; plane <= plane_end; ++plane) {

diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 72ce6dd..be28e05 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c

@@ -989,9 +989,18 @@
   if (rc_cfg->mode == AOM_CQ || rc_cfg->mode == AOM_Q) {
     // printf("Superres %d %d %d = %d\n", superres_denom, intra_only,
     //        rc->frames_to_key, !(intra_only && rc->frames_to_key <= 1));
+
     if ((superres_mode == AOM_SUPERRES_QTHRESH ||
          superres_mode == AOM_SUPERRES_AUTO) &&
         superres_denom != SCALE_NUMERATOR) {
+#if CONFIG_2D_SR
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+      active_qp = rc_cfg->qp;
+#else
+      active_qp =
+          AOMMAX(active_qp - ((superres_denom - SCALE_NUMERATOR) * 3), 0);
+#endif          
+#else   // CONFIG_2D_SR
       int mult = SUPERRES_QADJ_PER_DENOM_KEYFRAME_SOLO;
       if (intra_only && rc->frames_to_key <= 1) {
         mult = 0;
@@ -1002,7 +1011,9 @@
       }
       active_qp =
           AOMMAX(active_qp - ((superres_denom - SCALE_NUMERATOR) * mult), 0);
+#endif  // CONFIG_2D_SR
     }
+
   }
   if (rc_cfg->mode == AOM_CQ && rc->total_target_bits > 0) {
     const double x = (double)rc->total_actual_bits / rc->total_target_bits;
@@ -1687,6 +1698,14 @@
     q = rc_pick_q_and_bounds(cpi, width, height, gf_index, bottom_index,
                              top_index, &rc->level1_qp);
   }
+
+#if CONFIG_2D_SR_FRAME_WISE_SWITCHING
+    if (cpi->superres_mode == AOM_SUPERRES_AUTO &&
+        cpi->common.superres_scale_denominator != SCALE_NUMERATOR) {
+      q = AOMMAX(q - ((int)(log2(((double)cpi->common.superres_scale_denominator) / 4) * 23)), 0);
+    }
+#endif
+
   if (gf_group->update_type[gf_index] == ARF_UPDATE ||
       gf_group->update_type[gf_index] == KFFLT_UPDATE)
     rc->level1_qp = q;

diff --git a/av1/encoder/ratectrl.h b/av1/encoder/ratectrl.h
index 420d6eb..098ea93 100644
--- a/av1/encoder/ratectrl.h
+++ b/av1/encoder/ratectrl.h

@@ -55,6 +55,9 @@
   int resize_width;
   int resize_height;
   uint8_t superres_denom;
+#if CONFIG_2D_SR
+  uint8_t superres_num;
+#endif  // CONFIG_2D_SR
 } size_params_type;
 
 enum {

diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index a35933c..7887802 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c

@@ -1788,9 +1788,22 @@
     return NULL;
   }
 #endif  // CONFIG_TIP
-
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+  if (ref_frame >= cpi->common.ref_frames_info.num_total_refs) {
+	  return NULL;
+  }
+#else
   assert(ref_frame < cpi->common.ref_frames_info.num_total_refs);
+#endif
+
+#if CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT
+  const uint8_t denom = cpi->oxcf.superres_cfg.superres_scale_denominator;
+  const int scale_index = (denom == SCALE_NUMERATOR) ? SUPERRES_SCALES : to_scale_index(cpi, denom);
+
+  RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame * (SUPERRES_SCALES + 1) + scale_index];
+#else
   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame];
+#endif
   const RefCntBuffer *const ref_buf =
       get_ref_frame_buf(&cpi->common, ref_frame);
   return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf

diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index be21280..0e5a70a 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h

@@ -40,6 +40,13 @@
   (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
    ((double)((D) >> (2 * (BD - 8))) * (1 << RDDIV_BITS)))
 
+#if CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE
+#define RDCOST_DBL_WITH_NATIVE_BD_DIST_SCALE(RM, R, D, BD, S)                  \
+  (((((double)(R)) * (RM) * (S) * (S) / 4) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
+   ((double)((D) >> (2 * (BD - 8))) * (1 << RDDIV_BITS))); assert(S > 1)
+#endif
+
+
 #define QIDX_SKIP_THRESH 115
 
 #define MV_COST_WEIGHT 108

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index df98b4f..a4195f6 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -830,7 +830,20 @@
   }
 
 #if CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_2D_SR_REF_MVS_INTER_FIX
+  if (mbmi->skip_mode) {
+    // Go back to unscaled reference.
+    if (scaled_ref_frame) {
+      // We had temporarily setup pred block based on scaled reference above. Go
+      // back to unscaled reference now, for subsequent use.
+      av1_setup_pred_block(xd, yv12_mb[ref_frame_idx], yv12, sf, sf,
+        num_planes);
+    }
+    return;
+}
+#else
   if (mbmi->skip_mode) return;
+#endif
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   // Gets an initial list of candidate vectors from neighbours and orders them
@@ -2187,7 +2200,11 @@
 #if CONFIG_FLEX_MVRES
                                                 pb_mv_precision,
 #endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+                                                NULL, 0);
+#else
                                                 NULL);
+#endif
               // Refine MV in a small range.
               av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
                                    total_samples,
@@ -2300,8 +2317,12 @@
 #if CONFIG_FLEX_MVRES
                                             mbmi->pb_mv_precision,
 #endif
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+                                            NULL, 0);
+#else
                                             NULL);
-          int valid = 0;
+#endif
+        int valid = 0;
 #if CONFIG_WARP_REF_LIST
           if (!allow_warp_parameter_signaling(
 #if CONFIG_CWG_D067_IMPROVED_WARP
@@ -2439,7 +2460,11 @@
 #if CONFIG_FLEX_MVRES
                                               mbmi->pb_mv_precision,
 #endif
-                                              NULL);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+			                                        NULL, 0);
+#else
+			                                        NULL);
+#endif
             const SubpelMvLimits *mv_limits = &ms_params.mv_limits;
 
             // Note: The warp filter is only able to accept small deviations
@@ -3414,6 +3439,7 @@
   const int drl_cost =
       get_drl_cost(cpi->common.features.max_drl_bits, mbmi, mbmi_ext, x);
   est_rd_rate += drl_cost;
+
   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd) {
     return true;
   }
@@ -3719,11 +3745,15 @@
   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
   // Do not prune when there is internal resizing. TODO(elliottk) fix this
   // so b/2384 can be resolved.
+#if CONFIG_2D_SR_USE_GOOD_INDICES
+  return good_indices;
+#else
   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
       (is_inter_ref_frame(mbmi->ref_frame[1]) &&
        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
     return good_indices;
   }
+#endif
 
   // Calculate the RD cost for the motion vectors using simple translation.
 #if CONFIG_SEP_COMP_DRL
@@ -4470,8 +4500,14 @@
   const int bw = block_size_wide[bsize];
   const int bh = block_size_high[bsize];
   // get inter predictors to use for masked compound modes
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  const AV1_COMMON *const cm = &cpi->common;
+  av1_build_inter_predictor_single_buf_y(xd, bsize, 0, p0, stride, cm);
+  av1_build_inter_predictor_single_buf_y(xd, bsize, 1, p1, stride, cm);
+#else
   av1_build_inter_predictor_single_buf_y(xd, bsize, 0, p0, stride);
   av1_build_inter_predictor_single_buf_y(xd, bsize, 1, p1, stride);
+#endif
   const struct buf_2d *const src = &x->plane[0].src;
 
   aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1,
@@ -4480,7 +4516,9 @@
 
   MB_MODE_INFO *const mbmi = xd->mi[0];
 
+#if !CONFIG_2D_SR_MC_PHASE_FIX
   const AV1_COMMON *const cm = &cpi->common;
+#endif
   const int same_side = is_ref_frame_same_side(cm, mbmi);
 
   const int N = 1 << num_pels_log2_lookup[bsize];
@@ -5406,7 +5444,11 @@
                       lower_mv_precision(&ref_mv, mbmi->pb_mv_precision);
                     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
                                                       &ref_mv, pb_mv_precision,
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+                                                      NULL, ref);
+#else
                                                       NULL);
+#endif
                     if (!av1_is_subpelmv_in_range(&ms_params.mv_limits,
                                                   cur_mv[ref].as_mv)) {
                       mv_outlim = 1;
@@ -6271,7 +6313,11 @@
                                      is_ibc_cost,
 #endif
                                      lookahead_search_sites,
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+                                     /*fine_search_interval=*/0, 0);
+#else
                                      /*fine_search_interval=*/0);
+#endif
 #else
   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
                                      &dv_ref.as_mv, lookahead_search_sites,
@@ -8243,6 +8289,7 @@
     InterModeSearchState *search_state, int skip_ref_frame_mask,
 #endif  // CONFIG_ALLOW_SAME_REF_COMPOUND
     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
+
   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
     return 1;
   }

diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c
index 655fcec..b6aaa79 100644
--- a/av1/encoder/reconinter_enc.c
+++ b/av1/encoder/reconinter_enc.c

@@ -88,6 +88,22 @@
     pos_x += SCALE_EXTRA_OFF;
     pos_y += SCALE_EXTRA_OFF;
 
+#if CONFIG_2D_SR_ZERO_PHASE
+    // TODO: Determine plane type from something other than ssx, ssy
+    if (sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += (ssx == 1) ? inter_pred_params->posx_offset[1]
+                          : inter_pred_params->posx_offset[0];
+    }
+    if (sf->y_scale_fp != REF_NO_SCALE) {
+      pos_y += (ssy == 1) ? inter_pred_params->posy_offset[1]
+                          : inter_pred_params->posy_offset[0];
+    }
+#elif CONFIG_2D_SR_MC_PHASE_FIX
+    if (ssx == 1 && sf->x_scale_fp != REF_NO_SCALE) {
+      pos_x += inter_pred_params->posx_offset[1];
+    }
+#endif
+
     const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
     const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
     const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
@@ -96,11 +112,116 @@
     pos_y = clamp(pos_y, top, bottom);
     pos_x = clamp(pos_x, left, right);
 
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      const int bw = use_optflow_refinement
+                         ? inter_pred_params->orig_block_width
+                         : inter_pred_params->block_width;
+      const int bh = use_optflow_refinement ? inter_pred_params->orig_block_height : inter_pred_params->block_height;
+
+      MV mv_q4;
+      if (use_optflow_refinement) {
+        // optflow refinement always returns MVs with 1/16 precision so it is
+        // not necessary to shift the MV before clamping
+        mv_q4.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + inter_pred_params->subsampling_y);
+        mv_q4.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED(src_mv->col * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + inter_pred_params->subsampling_x);
+      } else {
+        mv_q4.row = (int16_t)(src_mv->row * (1 << (1 - inter_pred_params->subsampling_y)));
+        mv_q4.col = (int16_t)(src_mv->col * (1 << (1 - inter_pred_params->subsampling_x)));
+      }
+
+      int mi_row = inter_pred_params->pix_row / (MI_SIZE >> inter_pred_params->subsampling_y);
+      int mi_col = inter_pred_params->pix_col / (MI_SIZE >> inter_pred_params->subsampling_x);
+      int mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
+      int mb_to_bottom_edge = GET_MV_SUBPEL((inter_pred_params->mi_rows - mi_row) * MI_SIZE - bh);
+      int mb_to_left_edge = -GET_MV_SUBPEL((mi_col * MI_SIZE));
+      int mb_to_right_edge = GET_MV_SUBPEL((inter_pred_params->mi_cols - mi_col) * MI_SIZE - bw);
+
+      const int spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS;
+      const int spel_right = spel_left - SUBPEL_SHIFTS;
+      const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
+      const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+
+      const SubpelMvLimits mv_limits = {
+        mb_to_left_edge * (1 << (1 - inter_pred_params->subsampling_x)) - spel_left,
+        mb_to_right_edge * (1 << (1 - inter_pred_params->subsampling_x)) + spel_right,
+        mb_to_top_edge * (1 << (1 - inter_pred_params->subsampling_y)) - spel_top,
+        mb_to_bottom_edge * (1 << (1 - inter_pred_params->subsampling_y)) + spel_bottom
+      };
+
+      clamp_mv(&mv_q4, &mv_limits);
+
+      int subbpel_pos_x = ((inter_pred_params->pix_col << SUBPEL_BITS) + mv_q4.col) << SCALE_EXTRA_BITS;
+      int subbpel_pos_y = ((inter_pred_params->pix_row << SUBPEL_BITS) + mv_q4.row) << SCALE_EXTRA_BITS;
+      subpel_params->subpel_x = subbpel_pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = subbpel_pos_y & SCALE_SUBPEL_MASK;
+    } else {
+      subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+      subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    }
+#else
     subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+#endif
+
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
+#if CONFIG_2D_SR_PHASE_ADJUSTMENT
+    if ((sf->x_scale_fp == sf->y_scale_fp) && ((sf->x_scale_fp == REF_2x_SCALE) || (sf->x_scale_fp == REF_3x_SCALE) || (sf->x_scale_fp == REF_4x_SCALE) || (sf->x_scale_fp == REF_6x_SCALE))) {
+      int scale = 0;
+      if (sf->x_scale_fp == REF_NO_SCALE) scale = 1;
+      if (sf->x_scale_fp == REF_2x_SCALE) scale = 2;
+      if (sf->x_scale_fp == REF_3x_SCALE) scale = 3;
+      if (sf->x_scale_fp == REF_4x_SCALE) scale = 4;
+      if (sf->x_scale_fp == REF_6x_SCALE) scale = 6;
+      assert(scale != 0);
+      inter_pred_params->conv_params.stride_scale = scale;
 
+      orig_pos_y = clamp(((orig_pos_y >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         top, bottom);
+      orig_pos_x = clamp(((orig_pos_x >> SUBPEL_BITS) << SCALE_SUBPEL_BITS) * scale,
+                         left, right);
+#if CONFIG_D071_IMP_MSK_BLD
+      if (inter_pred_params->border_data.enable_bacp) {
+        // Get reference block top left coordinate.
+        subpel_params->x0 = orig_pos_x >> SCALE_SUBPEL_BITS;
+        subpel_params->y0 = orig_pos_y >> SCALE_SUBPEL_BITS;
+        // Get reference block bottom right coordinate.
+        subpel_params->x1 =
+          ((orig_pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+            SCALE_SUBPEL_BITS) +
+          scale;
+        subpel_params->y1 = ((orig_pos_y + (inter_pred_params->block_height - 1) *
+          subpel_params->ys) >>
+          SCALE_SUBPEL_BITS) +
+          scale;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+      *pre = pre_buf->buf0 +
+             (orig_pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+             (orig_pos_x >> SCALE_SUBPEL_BITS);
+    } else {
+      inter_pred_params->conv_params.stride_scale = 1;
+#if CONFIG_D071_IMP_MSK_BLD
+      if (inter_pred_params->border_data.enable_bacp) {
+        // Get reference block top left coordinate.
+        subpel_params->x0 = pos_x >> SCALE_SUBPEL_BITS;
+        subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS;
+        // Get reference block bottom right coordinate.
+        subpel_params->x1 =
+          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+            SCALE_SUBPEL_BITS) +
+          1;
+        subpel_params->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
+          subpel_params->ys) >>
+          SCALE_SUBPEL_BITS) +
+          1;
+      }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+      *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
+             (pos_x >> SCALE_SUBPEL_BITS);
+    }
+#else
 #if CONFIG_D071_IMP_MSK_BLD
     if (inter_pred_params->border_data.enable_bacp) {
       // Get reference block top left coordinate.
@@ -108,18 +229,18 @@
       subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS;
       // Get reference block bottom right coordinate.
       subpel_params->x1 =
-          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
-           SCALE_SUBPEL_BITS) +
-          1;
+        ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+          SCALE_SUBPEL_BITS) +
+        1;
       subpel_params->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
-                                        subpel_params->ys) >>
-                           SCALE_SUBPEL_BITS) +
-                          1;
+        subpel_params->ys) >>
+        SCALE_SUBPEL_BITS) +
+        1;
     }
 #endif  // CONFIG_D071_IMP_MSK_BLD
-
     *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
            (pos_x >> SCALE_SUBPEL_BITS);
+#endif
 #if CONFIG_OPTFLOW_REFINEMENT || CONFIG_EXT_RECUR_PARTITIONS
   } else {
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
@@ -212,7 +333,12 @@
                              enc_calc_subpel_params);
 }
 
+#if CONFIG_2D_SR_MC_PHASE_FIX
+void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     const struct AV1Common *const cm) {
+#else
 void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col) {
+#endif
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
   struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
@@ -226,6 +352,9 @@
   av1_init_inter_params(&inter_pred_params, pd->width, pd->height, mi_y, mi_x,
                         pd->subsampling_x, pd->subsampling_y, xd->bd, false, sf,
                         pd->pre, xd->mi[0]->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  av1_init_phase_offset(&inter_pred_params, cm);
+#endif
 
   inter_pred_params.conv_params = get_conv_params_no_round(
       0, AOM_PLANE_Y, xd->tmp_conv_dst, MAX_SB_SIZE, false, xd->bd);
@@ -379,6 +508,9 @@
         &inter_pred_params, bw, bh, mi_y >> pd->subsampling_y,
         mi_x >> pd->subsampling_x, pd->subsampling_x, pd->subsampling_y, xd->bd,
         0, xd->block_ref_scale_factors[0], pre_buf, above_mbmi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, ctxt->cm);
+#endif
     inter_pred_params.conv_params = get_conv_params(0, j, xd->bd);
 
     av1_enc_build_one_inter_predictor(pd->dst.buf, pd->dst.stride, &mv,
@@ -440,9 +572,16 @@
                                   dst_stride2);
 }
 
+#if CONFIG_2D_SR_MC_PHASE_FIX
+void av1_build_inter_predictor_single_buf_y(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                            int ref, uint16_t *dst,
+                                            int ext_dst_stride,
+                                            const struct AV1Common *const cm) {
+#else
 void av1_build_inter_predictor_single_buf_y(MACROBLOCKD *xd, BLOCK_SIZE bsize,
                                             int ref, uint16_t *dst,
                                             int ext_dst_stride) {
+#endif
   assert(bsize < BLOCK_SIZES_ALL);
   const MB_MODE_INFO *mi = xd->mi[0];
   const int mi_row = xd->mi_row;
@@ -470,6 +609,10 @@
       &inter_pred_params, bw, bh, mi_y >> pd->subsampling_y,
       mi_x >> pd->subsampling_x, pd->subsampling_x, pd->subsampling_y, xd->bd,
       0, xd->block_ref_scale_factors[ref], &pd->pre[ref], mi->interp_fltr);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+  av1_init_phase_offset(&inter_pred_params, cm);
+#endif
+
   inter_pred_params.conv_params = get_conv_params(0, plane, xd->bd);
   av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
 

diff --git a/av1/encoder/reconinter_enc.h b/av1/encoder/reconinter_enc.h
index 5f3de5f..b9d7976 100644
--- a/av1/encoder/reconinter_enc.h
+++ b/av1/encoder/reconinter_enc.h

@@ -32,7 +32,12 @@
                                    const BUFFER_SET *ctx, BLOCK_SIZE bsize,
                                    int plane_from, int plane_to);
 
+#if CONFIG_2D_SR_MC_PHASE_FIX
+void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col,
+                                     const struct AV1Common *const cm);
+#else
 void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col);
+#endif
 
 void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                 int plane, MB_MODE_INFO *mi,
@@ -65,9 +70,16 @@
 
 void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd);
 
+#if CONFIG_2D_SR_MC_PHASE_FIX
+void av1_build_inter_predictor_single_buf_y(MACROBLOCKD *xd, BLOCK_SIZE bsize,
+                                            int ref, uint16_t *dst,
+                                            int ext_dst_stride,
+                                            const struct AV1Common *const cm);
+#else
 void av1_build_inter_predictor_single_buf_y(MACROBLOCKD *xd, BLOCK_SIZE bsize,
                                             int ref, uint16_t *ext_dst,
                                             int ext_dst_stride);
+#endif
 
 void av1_build_wedge_inter_predictor_from_buf_y(
     MACROBLOCKD *xd, BLOCK_SIZE bsize, uint16_t *ext_dst0, int ext_dst_stride0,

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 68fa3ee..4de408e 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -395,7 +395,11 @@
   }
 
   sf->rd_sf.perform_coeff_opt = 1;
+#if CONFIG_2D_SR
+  sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_ALL;
+#else   // CONFIG_2D_SR
   sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
+#endif  // CONFIG_2D_SR
 
   if (speed >= 1) {
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -1165,9 +1169,14 @@
     sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
 
   // No recode or trellis for 1 pass.
+#if CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x
+  if ((oxcf->pass == 0 && has_no_stats_stage(cpi)) || oxcf->superres_cfg.superres_mode == AOM_SUPERRES_AUTO)
+#else
   if (oxcf->pass == 0 && has_no_stats_stage(cpi))
+#endif
     sf->hl_sf.recode_loop = DISALLOW_RECODE;
 
+
   MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
   if (sf->mv_sf.subpel_search_method == SUBPEL_TREE) {
     mv_search_params->find_fractional_mv_step = av1_find_best_sub_pixel_tree;

diff --git a/av1/encoder/superres_scale.c b/av1/encoder/superres_scale.c
index 0c5c9e0..24fae16 100644
--- a/av1/encoder/superres_scale.c
+++ b/av1/encoder/superres_scale.c

@@ -14,6 +14,59 @@
 #include "av1/encoder/superres_scale.h"
 #include "av1/encoder/random.h"
 
+#if CONFIG_2D_SR
+// Compute the down-up mse for each denominator
+static void analyze_downup_mse(const AV1_COMP *cpi, double *mse) {
+  const YV12_BUFFER_CONFIG *buf = cpi->unscaled_source;
+  const int bd = cpi->td.mb.e_mbd.bd;
+  const int size = buf->y_crop_width * buf->y_crop_height;
+  for (int this_index = 0; this_index < SUPERRES_SCALES; ++this_index) {
+    const int denom = superres_scales[this_index].scale_denom;
+    const int64_t sse = av1_downup_lanczos_sse(buf, bd, denom, SCALE_NUMERATOR);
+    mse[this_index] = (double)sse / size;
+  }
+  const int64_t sse =
+      av1_downup_lanczos_sse(buf, bd, SCALE_NUMERATOR * 4, SCALE_NUMERATOR);
+  mse[SUPERRES_SCALES] = (double)sse / size;
+}
+
+#define SUPERRES_DOWNUPMSE_BY_Q2_THRESH_KEYFRAME_SOLO 0.012
+#define SUPERRES_DOWNUPMSE_BY_Q2_THRESH_KEYFRAME 0.008
+#define SUPERRES_DOWNUPMSE_BY_Q2_THRESH_ARFFRAME 0.008
+#define SUPERRES_DOWNUPMSE_BY_AC_THRESH 0.2
+
+double get_downupmse_by_q2_thresh(const GF_GROUP *gf_group,
+                                  const RATE_CONTROL *rc) {
+  // TODO(now): Return keyframe thresh * factor based on frame type / pyramid
+  // level.
+  if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
+      gf_group->update_type[gf_group->index] == KFFLT_UPDATE) {
+    return SUPERRES_DOWNUPMSE_BY_Q2_THRESH_ARFFRAME;
+  } else if (gf_group->update_type[gf_group->index] == KF_UPDATE) {
+    if (rc->frames_to_key <= 1)
+      return SUPERRES_DOWNUPMSE_BY_Q2_THRESH_KEYFRAME_SOLO;
+    else
+      return SUPERRES_DOWNUPMSE_BY_Q2_THRESH_KEYFRAME;
+  } else {
+    assert(0);
+  }
+  return 0;
+}
+
+static uint8_t get_superres_denom_from_downupmse(int qindex, double *downupmse,
+                                                 double threshq,
+                                                 double threshp) {
+  const double q = av1_convert_qindex_to_q(qindex, AOM_BITS_8);
+  const double tq = threshq * q * q;
+  const double tp = threshp * downupmse[SUPERRES_SCALES];
+  const double thresh = AOMMIN(tq, tp);
+  int k;
+  for (k = 0; k < SUPERRES_SCALES; ++k) {
+    if (downupmse[k] > thresh) break;
+  }
+  return SCALE_NUMERATOR + 2 * k;
+}
+#else
 // Compute the horizontal frequency components' energy in a frame
 // by calculuating the 16x4 Horizontal DCT. This is to be used to
 // decide the superresolution parameters.
@@ -49,7 +102,7 @@
     for (int k = 1; k < 16; ++k) energy[k] = 1e+20;
   }
 }
-
+/*
 static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) {
   // Choose an arbitrary random number
   static unsigned int seed = 56789;
@@ -79,7 +132,7 @@
          cpi->sf.hl_sf.superres_auto_search_type != SUPERRES_AUTO_SOLO &&
          cpi->rc.frames_to_key > 1;
 }
-
+*/
 #define SUPERRES_ENERGY_BY_Q2_THRESH_KEYFRAME_SOLO 0.048
 #define SUPERRES_ENERGY_BY_Q2_THRESH_KEYFRAME 0.032
 #define SUPERRES_ENERGY_BY_Q2_THRESH_ARFFRAME 0.032
@@ -116,6 +169,55 @@
   }
   return 3 * SCALE_NUMERATOR - k;
 }
+#endif  // CONFIG_2D_SR
+
+static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) {
+  // Choose an arbitrary random number
+  static unsigned int seed = 56789;
+  const ResizeCfg *resize_cfg = &cpi->oxcf.resize_cfg;
+  if (is_stat_generation_stage(cpi)) return SCALE_NUMERATOR;
+  uint8_t new_denom = SCALE_NUMERATOR;
+
+  if (cpi->common.seq_params.reduced_still_picture_hdr) return SCALE_NUMERATOR;
+  switch (resize_cfg->resize_mode) {
+    case RESIZE_NONE: new_denom = SCALE_NUMERATOR; break;
+    case RESIZE_FIXED:
+      if (cpi->common.current_frame.frame_type == KEY_FRAME)
+        new_denom = resize_cfg->resize_kf_scale_denominator;
+      else
+        new_denom = resize_cfg->resize_scale_denominator;
+      break;
+    case RESIZE_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
+    default: assert(0);
+  }
+  return new_denom;
+}
+
+#if CONFIG_2D_SR
+static bool superres_in_recode_allowed_qp(const AV1_COMP *const cpi) {
+  const int qpoff = (cpi->td.mb.e_mbd.bd - 8) * 24;
+  const int qp = cpi->oxcf.rc_cfg.qp;
+  const int q_thresh_kf = 160 + qpoff;
+  const int q_thresh_non_kf = 160 + qpoff;
+
+  return (frame_is_intra_only(&cpi->common) && qp > q_thresh_kf) ||
+         qp > q_thresh_non_kf;
+}
+#endif  // CONFIG_2D_SR
+
+int av1_superres_in_recode_allowed(const AV1_COMP *const cpi) {
+  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
+  // Empirically found to not be beneficial for image coding.
+  return oxcf->superres_cfg.superres_mode == AOM_SUPERRES_AUTO &&
+#if CONFIG_2D_SR
+#if !CONFIG_2D_SR_FRAME_WISE_SWITCHING
+         superres_in_recode_allowed_qp(cpi) &&
+#endif  // CONFIG_2D_SR
+#else   // CONFIG_2D_SR
+         cpi->rc.frames_to_key > 1 &&
+#endif  // CONFIG_2D_SR
+         cpi->sf.hl_sf.superres_auto_search_type != SUPERRES_AUTO_SOLO;
+}
 
 static uint8_t get_superres_denom_for_qindex(const AV1_COMP *cpi, int qindex,
                                              int sr_kf, int sr_arf) {
@@ -135,6 +237,29 @@
     return SCALE_NUMERATOR;
   }
 
+#if CONFIG_2D_SR
+  int denom = SCALE_NUMERATOR;
+  (void)qindex;
+  double downupmse[SUPERRES_SCALES + 1];
+  analyze_downup_mse(cpi, downupmse);
+  const double downupmse_by_q2_thresh =
+      get_downupmse_by_q2_thresh(gf_group, &cpi->rc);
+  denom = get_superres_denom_from_downupmse(qindex, downupmse,
+                                            downupmse_by_q2_thresh,
+                                            SUPERRES_DOWNUPMSE_BY_AC_THRESH);
+  /*
+  const double q = av1_convert_qindex_to_q(qindex, cpi->td.mb.e_mbd.bd);
+  const double iq2 = 1.0 / (q * q);
+
+  printf("\nDownup mse = [");
+  for (int k = 0; k <= SUPERRES_SCALES; ++k) printf("%f, ", downup_mse[k]);
+  printf("]\n");
+  printf("\nDownup mse/q^2 = [");
+  for (int k = 0; k <= SUPERRES_SCALES; ++k)
+    printf("%f, ", iq2 * downup_mse[k]);
+  printf("]\n");
+  */
+#else
   double energy[16];
   analyze_hor_freq(cpi, energy);
 
@@ -158,18 +283,33 @@
     // to be tried anyway.
     denom = AOMMAX(denom, SCALE_NUMERATOR + 1);
   }
+#endif  // CONFIG_2D_SR
   return denom;
 }
 
+#if CONFIG_2D_SR
+// TODO(yuec): redesign the algorithm to return a valid option that is in the
+// new lookup table.
+static ScaleFactor calculate_next_superres_scale(AV1_COMP *cpi) {
+#else   // CONFIG_2D_SR
 static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
+#endif  // CONFIG_2D_SR
   // Choose an arbitrary random number
   static unsigned int seed = 34567;
   const AV1EncoderConfig *oxcf = &cpi->oxcf;
   const SuperResCfg *const superres_cfg = &oxcf->superres_cfg;
   const FrameDimensionCfg *const frm_dim_cfg = &oxcf->frm_dim_cfg;
   const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
+#if CONFIG_2D_SR
+  ScaleFactor factor = { SCALE_NUMERATOR, SCALE_NUMERATOR };
+#endif  // CONFIG_2D_SR
 
-  if (is_stat_generation_stage(cpi)) return SCALE_NUMERATOR;
+  if (is_stat_generation_stage(cpi))
+#if CONFIG_2D_SR
+    return factor;
+#else   // CONFIG_2D_SR
+    return SCALE_NUMERATOR;
+#endif  // CONFIG_2D_SR
   uint8_t new_denom = SCALE_NUMERATOR;
 
   // Make sure that superres mode of the frame is consistent with the
@@ -193,7 +333,13 @@
       else
         new_denom = superres_cfg->superres_scale_denominator;
       break;
-    case AOM_SUPERRES_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break;
+    case AOM_SUPERRES_RANDOM:
+#if CONFIG_2D_SR
+      new_denom = 2 * (lcg_rand16(&seed) % 5) + 8;
+#else
+      new_denom = lcg_rand16(&seed) % 9 + 8;
+#endif  // CONFIG_2D_SR
+      break;
     case AOM_SUPERRES_QTHRESH: {
       // Do not use superres when screen content tools are used.
       if (cpi->common.features.allow_screen_content_tools) break;
@@ -230,9 +376,14 @@
       const SUPERRES_AUTO_SEARCH_TYPE sr_search_type =
           cpi->sf.hl_sf.superres_auto_search_type;
       const int qthresh = (sr_search_type == SUPERRES_AUTO_SOLO) ? 128 : 0;
+#if CONFIG_2D_SR_AUTO_DISABLE_SPEEDUP
+      // TODO: compute q based on coded resolution
+      {
+#else
       if (q <= qthresh) {
         new_denom = SCALE_NUMERATOR;  // Don't use superres.
       } else {
+#endif
         if (sr_search_type == SUPERRES_AUTO_ALL) {
           if (cpi->common.current_frame.frame_type == KEY_FRAME)
             new_denom = superres_cfg->superres_kf_scale_denominator;
@@ -246,11 +397,37 @@
     }
     default: assert(0);
   }
+#if CONFIG_2D_SR
+  factor.scale_denom = new_denom;
+  return factor;
+#else   // CONFIG_2D_SR
   return new_denom;
+#endif  // CONFIG_2D_SR
 }
 
+#if CONFIG_2D_SR
+static int dimension_is_ok(int orig_dim, int resized_dim, int denom, int nom) {
+#if CONFIG_2D_SR_SCALE_EXT 
+  return (resized_dim * nom >= orig_dim * denom / 6);
+#else  // CONFIG_2D_SR_SCALE_EXT
+  return (resized_dim * nom >= orig_dim * denom / 2);
+#endif  // CONFIG_2D_SR_SCALE_EXT
+}
+
+static int dimensions_are_ok(int owidth, int oheight, size_params_type *rsz) {
+  const uint8_t denom = rsz->superres_denom;
+  const uint8_t nom = rsz->superres_num;
+
+  return dimension_is_ok(owidth, rsz->resize_width, denom, nom) &&
+         dimension_is_ok(oheight, rsz->resize_height, denom, nom);
+}
+#else   // CONFIG_2D_SR
 static int dimension_is_ok(int orig_dim, int resized_dim, int denom) {
-  return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 2);
+#if CONFIG_2D_SR_SCALE_EXT 
+	return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 6);
+#else  // CONFIG_2D_SR_SCALE_EXT
+	return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 2);
+#endif  // CONFIG_2D_SR_SCALE_EXT 
 }
 
 static int dimensions_are_ok(int owidth, int oheight, size_params_type *rsz) {
@@ -258,6 +435,7 @@
   (void)oheight;
   return dimension_is_ok(owidth, rsz->resize_width, rsz->superres_denom);
 }
+#endif  // CONFIG_2D_SR
 
 static int validate_size_scales(RESIZE_MODE resize_mode,
                                 aom_superres_mode superres_mode, int owidth,
@@ -300,7 +478,12 @@
              superres_mode == AOM_SUPERRES_RANDOM) {
     // Alter both resize and superres scales as needed to enforce conformity.
     do {
+#if CONFIG_2D_SR
+      if (resize_denom * rsz->superres_num >
+          rsz->superres_denom * SCALE_NUMERATOR)
+#else   // CONFIG_2D_SR
       if (resize_denom > rsz->superres_denom)
+#endif  // CONFIG_2D_SR
         --resize_denom;
       else
         --rsz->superres_denom;
@@ -310,7 +493,11 @@
                                 resize_denom);
     } while (!dimensions_are_ok(owidth, oheight, rsz) &&
              (resize_denom > SCALE_NUMERATOR ||
+#if CONFIG_2D_SR
+              rsz->superres_denom > rsz->superres_num));
+#else       // CONFIG_2D_SR
               rsz->superres_denom > SCALE_NUMERATOR));
+#endif      // CONFIG_2D_SR
   } else {  // We are allowed to alter neither resize scale nor superres
             // scale.
     return 0;
@@ -323,8 +510,14 @@
   const AV1EncoderConfig *oxcf = &cpi->oxcf;
   ResizePendingParams *resize_pending_params = &cpi->resize_pending_params;
   const FrameDimensionCfg *const frm_dim_cfg = &oxcf->frm_dim_cfg;
+#if CONFIG_2D_SR
+  size_params_type rsz = { frm_dim_cfg->width, frm_dim_cfg->height,
+                           SCALE_NUMERATOR, SCALE_NUMERATOR };
+  ScaleFactor factor;
+#else   // CONFIG_2D_SR
   size_params_type rsz = { frm_dim_cfg->width, frm_dim_cfg->height,
                            SCALE_NUMERATOR };
+#endif  // CONFIG_2D_SR
   int resize_denom = SCALE_NUMERATOR;
   if (is_stat_generation_stage(cpi)) return rsz;
   if (resize_pending_params->width && resize_pending_params->height) {
@@ -339,7 +532,13 @@
     av1_calculate_scaled_size(&rsz.resize_width, &rsz.resize_height,
                               resize_denom);
   }
+#if CONFIG_2D_SR
+  factor = calculate_next_superres_scale(cpi);
+  rsz.superres_denom = factor.scale_denom;
+  rsz.superres_num = factor.scale_num;
+#else   // CONFIG_2D_SR
   rsz.superres_denom = calculate_next_superres_scale(cpi);
+#endif  // CONFIG_2D_SR
   if (!validate_size_scales(oxcf->resize_cfg.resize_mode, cpi->superres_mode,
                             frm_dim_cfg->width, frm_dim_cfg->height, &rsz))
     assert(0 && "Invalid scale parameters");
@@ -355,16 +554,53 @@
   cm->superres_upscaled_width = encode_width;
   cm->superres_upscaled_height = encode_height;
   cm->superres_scale_denominator = rsz->superres_denom;
+#if CONFIG_2D_SR
+  cm->superres_scale_numerator = rsz->superres_num;
+  av1_calculate_scaled_superres_size(&encode_width, &encode_height,
+                                     rsz->superres_denom, rsz->superres_num);
+#else
   av1_calculate_scaled_superres_size(&encode_width, &encode_height,
                                      rsz->superres_denom);
+#endif
   av1_set_frame_size(cpi, encode_width, encode_height);
 }
 
+#if CONFIG_2D_SR
+static uint8_t get_superres_scale_index(const size_params_type *rsz) {
+  const int denom = rsz->superres_denom;
+  const int num = rsz->superres_num;
+
+  for (int i = 0; i < SUPERRES_SCALES; i++) {
+    if (denom == superres_scales[i].scale_denom &&
+        num == superres_scales[i].scale_num)
+      return i;
+  }
+  return SUPERRES_SCALES;
+}
+#endif  // CONFIG_2D_SR
+
 void av1_setup_frame_size(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   // Reset superres params from previous frame.
   cm->superres_scale_denominator = SCALE_NUMERATOR;
+#if CONFIG_2D_SR
+  cm->superres_scale_numerator = SCALE_NUMERATOR;
+#endif  // CONFIG_2D_SR
   const size_params_type rsz = calculate_next_size_params(cpi);
+#if CONFIG_2D_SR
+  cm->superres_scale_index = get_superres_scale_index(&rsz);
+  if (cm->superres_scale_index < SUPERRES_SCALES) {
+    cm->superres_scale_denominator =
+        superres_scales[cm->superres_scale_index].scale_denom;
+    cm->superres_scale_numerator =
+        superres_scales[cm->superres_scale_index].scale_num;
+  } else {
+    assert(cm->superres_scale_denominator == SCALE_NUMERATOR &&
+           cm->superres_scale_numerator == SCALE_NUMERATOR &&
+           "The encoder-decided superres scale is not supported.");
+  }
+#endif  // CONFIG_2D_SR
+
   setup_frame_size_from_params(cpi, &rsz);
 
   assert(av1_is_min_tile_width_satisfied(cm));

diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index 0e7fdce..192f567 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c

@@ -121,6 +121,7 @@
       min_frame_size >= 720
           ? MV_COST_L1_HDRES
           : (min_frame_size >= 480 ? MV_COST_L1_MIDRES : MV_COST_L1_LOWRES);
+
 #if CONFIG_FLEX_MVRES
   assert(cm->features.fr_mv_precision == MV_PRECISION_ONE_EIGHTH_PEL);
   const MvSubpelPrecision pb_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL;
@@ -158,7 +159,11 @@
 #endif
 #endif
                                      search_site_cfg,
-                                     /*fine_search_interval=*/0);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  /*fine_search_interval=*/0, 0);
+#else
+	  /*fine_search_interval=*/0);
+#endif
   av1_set_mv_search_method(&full_ms_params, search_site_cfg, search_method);
   full_ms_params.run_mesh_search = 1;
   full_ms_params.mv_cost_params.mv_cost_type = mv_cost_type;
@@ -184,7 +189,11 @@
 #if CONFIG_FLEX_MVRES
                                       pb_mv_precision,
 #endif
-                                      cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+		cost_list, 0);
+#else
+		cost_list);
+#endif
     ms_params.forced_stop = EIGHTH_PEL;
     ms_params.var_params.subpel_search_type = subpel_search_type;
     // Since we are merely refining the result from full pixel search, we don't
@@ -231,7 +240,11 @@
 #endif
 #endif
                                            search_site_cfg,
-                                           /*fine_search_interval=*/0);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+			/*fine_search_interval=*/0, 0);
+#else
+			/*fine_search_interval=*/0);
+#endif
         av1_set_mv_search_method(&full_ms_params, search_site_cfg,
                                  search_method);
         full_ms_params.run_mesh_search = 1;
@@ -246,7 +259,11 @@
 #if CONFIG_FLEX_MVRES
                                           pb_mv_precision,
 #endif
-                                          cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+			cost_list, 0);
+#else
+			cost_list);
+#endif
         ms_params.forced_stop = EIGHTH_PEL;
         ms_params.var_params.subpel_search_type = subpel_search_type;
         // Since we are merely refining the result from full pixel search, we
@@ -353,12 +370,22 @@
  *
  * Nothing returned, But the contents of `pred` will be modified.
  */
+#if CONFIG_2D_SR_MC_PHASE_FIX
+static void tf_build_predictor(const YV12_BUFFER_CONFIG *ref_frame,
+                               const MACROBLOCKD *mbd,
+                               const BLOCK_SIZE block_size, const int mb_row,
+                               const int mb_col, const int num_planes,
+                               const struct scale_factors *scale,
+                               const MV *subblock_mvs, uint16_t *pred,
+                               const struct AV1Common *const cm) {
+#else
 static void tf_build_predictor(const YV12_BUFFER_CONFIG *ref_frame,
                                const MACROBLOCKD *mbd,
                                const BLOCK_SIZE block_size, const int mb_row,
                                const int mb_col, const int num_planes,
                                const struct scale_factors *scale,
                                const MV *subblock_mvs, uint16_t *pred) {
+#endif
   // Information of the entire block.
   const int mb_height = block_size_high[block_size];  // Height.
   const int mb_width = block_size_wide[block_size];   // Width.
@@ -407,6 +434,9 @@
         av1_init_inter_params(&inter_pred_params, w, h, y, x, subsampling_x,
                               subsampling_y, bit_depth, is_intrabc, scale,
                               &ref_buf, interp_filters);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+        av1_init_phase_offset(&inter_pred_params, cm);
+#endif
         inter_pred_params.conv_params = get_conv_params(0, plane, bit_depth);
         av1_enc_build_one_inter_predictor(&pred[plane_offset + i * plane_w + j],
                                           plane_w, &mv, &inter_pred_params);
@@ -800,6 +830,7 @@
   // Setup.
   mbd->block_ref_scale_factors[0] = scale;
   mbd->block_ref_scale_factors[1] = scale;
+
   // A temporary block info used to store state in temporal filtering process.
   MB_MODE_INFO *tmp_mb_mode_info = (MB_MODE_INFO *)malloc(sizeof(MB_MODE_INFO));
   memset(tmp_mb_mode_info, 0, sizeof(MB_MODE_INFO));
@@ -825,6 +856,7 @@
       memset(accum, 0, num_planes * mb_pels * sizeof(accum[0]));
       memset(count, 0, num_planes * mb_pels * sizeof(count[0]));
       MV ref_mv = kZeroMv;  // Reference motion vector passed down along frames.
+
       // Perform temporal filtering frame by frame.
       for (int frame = 0; frame < num_frames; frame++) {
         if (frames[frame] == NULL) continue;
@@ -841,8 +873,15 @@
                            mb_row, mb_col, &ref_mv, subblock_mvs,
                            subblock_mses);
         }
+
+#if CONFIG_2D_SR_MC_PHASE_FIX
+        const AV1_COMMON *const cm = &cpi->common;
+        tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
+                           num_planes, scale, subblock_mvs, pred, cm);
+#else
         tf_build_predictor(frames[frame], mbd, block_size, mb_row, mb_col,
                            num_planes, scale, subblock_mvs, pred);
+#endif
 
         // Perform weighted averaging.
         if (frame == filter_frame_idx) {  // Frame to be filtered.
@@ -1086,6 +1125,7 @@
   const GF_GROUP *const gf_group = &cpi->gf_group;
   const uint8_t group_idx = gf_group->index;
   const FRAME_UPDATE_TYPE update_type = gf_group->update_type[group_idx];
+
   // Filter one more ARF if the lookahead index is leq 7 (w.r.t. 9-th frame).
   // This frame is ALWAYS a show existing frame.
   const int is_second_arf =
@@ -1103,6 +1143,7 @@
   int num_frames_for_filtering = 0;
   int filter_frame_idx = -1;
   double noise_levels[MAX_MB_PLANE] = { 0 };
+
   tf_setup_filtering_buffer(cpi, filter_frame_lookahead_idx, is_second_arf,
                             frames, &num_frames_for_filtering,
                             &filter_frame_idx, noise_levels);
@@ -1183,7 +1224,6 @@
       }
     }
   }
-
   return 1;
 }
 /*!\endcond */

diff --git a/av1/encoder/temporal_filter.h b/av1/encoder/temporal_filter.h
index ffc6b52..8bca669 100644
--- a/av1/encoder/temporal_filter.h
+++ b/av1/encoder/temporal_filter.h

@@ -105,6 +105,7 @@
  *
  * \return Whether temporal filtering is successfully done.
  */
+
 int av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
                         int *show_existing_arf);
 

diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index a09040c..5ef59ea 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c

@@ -170,7 +170,11 @@
 #endif
 #endif
                                      search_site_cfg,
-                                     /*fine_search_interval=*/0);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  /*fine_search_interval=*/0, 0);
+#else
+	  /*fine_search_interval=*/0);
+#endif
   SEARCH_METHODS search_method = tpl_sf->search_method;
 #if CONFIG_FLEX_MVRES
   // MV search of flex MV precision is supported only for NSTEP or DIAMOND
@@ -191,7 +195,11 @@
 #if CONFIG_FLEX_MVRES
                                     pb_mv_precision,
 #endif
-                                    cost_list);
+#if CONFIG_2D_SR_SECOND_PRED_FIX
+	  cost_list, 0);
+#else
+	  cost_list);
+#endif
   ms_params.forced_stop = tpl_sf->subpel_force_stop;
   ms_params.var_params.subpel_search_type = USE_2_TAPS;
   ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE;
@@ -234,6 +242,7 @@
   return 0;
 }
 
+
 static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
                                        int mi_col, BLOCK_SIZE bsize,
                                        TX_SIZE tx_size,
@@ -437,6 +446,9 @@
     av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE,
                           mi_col * MI_SIZE, 0, 0, xd->bd, 0, &tpl_data->sf,
                           &ref_buf, kernel);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
     inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd);
 
     av1_enc_build_one_inter_predictor(predictor, bw, &best_rfidx_mv.as_mv,
@@ -488,6 +500,9 @@
     av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE,
                           mi_col * MI_SIZE, 0, 0, xd->bd, 0, &tpl_data->sf,
                           &ref_buf, kernel);
+#if CONFIG_2D_SR_MC_PHASE_FIX
+    av1_init_phase_offset(&inter_pred_params, cm);
+#endif
     inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd);
 
     av1_enc_build_one_inter_predictor(dst_buffer, dst_buffer_stride,
@@ -889,6 +904,7 @@
   const BLOCK_SIZE bsize = convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
   const TX_SIZE tx_size = max_txsize_lookup[bsize];
   const int mi_height = mi_size_high[bsize];
+
   for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += mi_height) {
     // Motion estimation row boundary
     av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height,
@@ -991,6 +1007,7 @@
       if (buf == NULL) break;
       tpl_frame->gf_picture = &buf->img;
     }
+
     // 'cm->current_frame.frame_number' is the display number
     // of the current frame.
     // 'anc_frame_offset' is the number of frames displayed so

diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index 9bfa060..32479ac 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c

@@ -2748,6 +2748,7 @@
         if (txfm_param.sec_tx_type > 0) continue;
       }
 #endif  // CONFIG_ATC_DCTX_ALIGNED
+
       // If rd cost based on coeff rate alone is already more than best_rd,
       // terminate early.
       if (RDCOST(x->rdmult, rate_cost, 0) > best_rd) continue;

diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 529ba9f..c5131c8 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake

@@ -201,6 +201,92 @@
   CONFIG_FLEXIBLE_RU_SIZE 1
   "AV2 experiment flag to choose RU size between 128x128, 256x256 and 512x512")
 
+set_aom_config_var(CONFIG_2D_SR 1 NUMBER
+                   "AV2 2D super resolution experiment flag.")
+set_aom_config_var(CONFIG_2D_SR_SCALE_EXT 1 NUMBER
+                   "AV2 2D super resolution scaling range extension flag.")
+set_aom_config_var(CONFIG_2D_SR_FRAME_WISE_SWITCHING 1 NUMBER
+                   "QP offset adjustment for framewise scaling factor select")
+set_aom_config_var(CONFIG_2D_SR_FRAME_WISE_SWITCHING_SSE_MATCHING 1 NUMBER
+                   "Fix on SSE value matching")                   
+set_aom_config_var(CONFIG_2D_SR_RESAMPLER_FIX 1 NUMBER
+                   "Co-sited Chroma position for Lanczos resampler")       
+set_aom_config_var(CONFIG_2D_SR_RESTORATION_FIX 1 NUMBER
+                   "Fix on restoration with SR")
+set_aom_config_var(CONFIG_2D_SR_RESTORATION_BUG_FIX 1 NUMBER
+                   "BUG Fix on restoration with SR")
+set_aom_config_var(CONFIG_2D_SR_RESTORATION_BUG_FIX_ON_STRIDE 1 NUMBER
+                   "BUG Fix on stride of restoration with SR")
+set_aom_config_var(CONFIG_2D_SR_RESTORATION_TILE_BASED_WRITE_SB 1 NUMBER
+                   "BUG Fix on mismatch of restoration with SR")
+set_aom_config_var(CONFIG_2D_SR_RESTORATION_FLEXIBLE_RU_SIZE_SCALE 1 NUMBER
+                   "Flexible RU Size adaptation for SR")                  
+set_aom_config_var(CONFIG_2D_SR_RESTORATION_BIT_ESTIMATE_SCALE 1 NUMBER
+                   "Scaling Bit Estimation for SR")
+set_aom_config_var(CONFIG_2D_SR_SET_TX_SKIP_ZERO 1 NUMBER
+                   "Improvement of pc wiener with SR")
+set_aom_config_var(CONFIG_2D_SR_PC_WIENER_ENABLE_FOR_SR 1 NUMBER
+                   "Enable PC Wiener when SR is used")
+set_aom_config_var(CONFIG_2D_SR_SAVE_BOUNDARY_AFTER_SR 1 NUMBER
+                   "Save boundaries for restoration after SR")
+set_aom_config_var(CONFIG_2D_SR_FIX_COMPOUND_ME 1 NUMBER
+                   "Fix on Motion Esitmation for compound prediction with SR")
+set_aom_config_var(CONFIG_2D_SR_SECOND_PRED_FIX 1 NUMBER
+                   "Fix on second prediction for compound mode")        
+set_aom_config_var(CONFIG_2D_SR_REF_IDX_FIX 1 NUMBER
+                   "Fix on ref idx")
+set_aom_config_var(CONFIG_2D_SR_RESTORE_SCALED_BUF 1 NUMBER
+                   "Restore scaled buffers")            
+set_aom_config_var(CONFIG_2D_SR_RESTORE_UNSCALED_BUF 1 NUMBER
+                   "Restore unscaled buffers")            
+set_aom_config_var(CONFIG_2D_SR_TILE_CONFIG 1 NUMBER
+                   "Tile off when SR is used")
+set_aom_config_var(CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES_FIX 1 NUMBER
+                   "Fix on Allow TIP direct with SR")              
+set_aom_config_var(CONFIG_2D_SR_BAWP_FIX 1 NUMBER
+                   "Fix on BAWP for SR")
+set_aom_config_var(CONFIG_2D_SR_REF_MVS_INTER_FIX 1 NUMBER
+                   "Fix on ref mvs inter for SR")
+set_aom_config_var(CONFIG_2D_SR_ZERO_MV_EXACT_MATCH 1 NUMBER
+                   "Force zero MV exact match for IL and OOL")                   
+set_aom_config_var(CONFIG_2D_SR_MC_PHASE_FIX 1 NUMBER
+                   "Fix MC phase with SR")
+set_aom_config_var(CONFIG_2D_SR_SUBSAMPLE_FOR_WARP 1 NUMBER
+                   "Enable subsampling for warp modes with SR.")
+set_aom_config_var(CONFIG_2D_SR_1_5X_SUBSAMPLE_FOR_WARP 1 NUMBER
+                    "Enable scaled reference support for 1.5x warp modes with SR.")
+set_aom_config_var(CONFIG_2D_SR_ZERO_PHASE 1 NUMBER
+                   "Use zero phase with downsampler, upsampler and MC in SR")
+set_aom_config_var(CONFIG_2D_SR_PHASE_ADJUSTMENT 1 NUMBER
+                   "Match OOL phase in IL 2x, 3x, 4x, 6x")
+set_aom_config_var(CONFIG_2D_SR_AUTO_SCALED_REF_SUPPORT 1 NUMBER
+                   "Enable scaled ref support in superres AUTO mode")
+set_aom_config_var(CONFIG_2D_SR_AUTO_DISABLE_SPEEDUP 1 NUMBER 
+                   "Disable q threshold based speedup for AUTO mode")
+set_aom_config_var(CONFIG_2D_SR_LIMIT_SCALE_FACTORS 1 NUMBER
+                   "Limit scale factors")
+set_aom_config_var(CONFIG_2D_SR_CLAMP_MV_FOR_TIP 1 NUMBER
+                   "Clamp MV instead of Sample position for 2x, 3x, 4x, 6x")
+set_aom_config_var(CONFIG_2D_SR_LANCZOS_RESAMPLER_Y4M_CONVERT_8BIT_TO_10BIT 1 NUMBER
+                   "Convert 8-bit content to 10-bit before resampling")
+set_aom_config_var(CONFIG_2D_SR_LANCZOS_RESAMPLER_Y4M_ALIGN_8 1 NUMBER
+                   "Use picture sizes derived in in-loop processing")
+set_aom_config_var(CONFIG_2D_SR_AUTO_DISABLE_SCREEN_CONTENT_TOOLS_FOR_NON_1x 1 NUMBER "Disable screen content for non 1x in AUTO MODE")
+set_aom_config_var(CONFIG_REFINEMV 1 NUMBER "Enable refinemv modes")
+set_aom_config_var(CONFIG_2D_SR_STRIDED_CONV_SPEED 1 NUMBER
+                    "Speedup strided convolution")
+set_aom_config_var(CONFIG_2D_SR_ADJUST_DELTA_QP 1 NUMBER
+                    "Adjust delta QP for SR")
+set_aom_config_var(CONFIG_2D_SR_USE_GOOD_INDICES 1 NUMBER
+                    "Use good_indices for SR")
+
+set_aom_config_var(
+  CONFIG_ACROSS_SCALE_TPL_MVS 1 NUMBER
+  "AV2 experiment flag to enable across scale temporal mv projection.")
+set_aom_config_var(
+  CONFIG_ALLOW_TIP_DIRECT_WITH_SUPERRES 1 NUMBER
+  "AV2 experiment flag to enable TIP direct mode across scale.")
+
 # Source of throughput analysis : CWG-B065
 set_aom_config_var(CONFIG_THROUGHPUT_ANALYSIS 0
                    "AV2 experiment flag to measure throughput.")
@@ -219,7 +305,7 @@
                    "Enable joint MVD coding with multiple scaling factors")
 set_aom_config_var(
   CONFIG_PALETTE_IMPROVEMENTS
-  1
+  0
   "AV2 experiment flag for palette parsing independency and improved palette color map coding."
 )
 set_aom_config_var(CONFIG_SKIP_MODE_SSE_BUG_FIX 1
@@ -249,7 +335,6 @@
 set_aom_config_var(CONFIG_SKIP_TXFM_OPT 1
                    "Enable to optimize the signaling of skip_txfm")
 set_aom_config_var(CONFIG_CWP 1 "Enables compound weighted prediction.")
-set_aom_config_var(CONFIG_REFINEMV 1 "Enable refinemv modes")
 
 set_aom_config_var(CONFIG_EXPLICIT_TEMPORAL_DIST_CALC 1
                    "Enable to explicit temporal distance calculation")

diff --git a/test/comp_avg_pred_test.h b/test/comp_avg_pred_test.h
index 3f1b3c5..a0700b9 100644
--- a/test/comp_avg_pred_test.h
+++ b/test/comp_avg_pred_test.h

@@ -234,7 +234,11 @@
                   NULL, NULL, 0, 0, NULL, output,
                   pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
                   sub_y_q3, ref8 + offset_r * w + offset_c, in_w, bd,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+                  &dist_wtd_comp_params, subpel_search, 0);
+#else
                   &dist_wtd_comp_params, subpel_search);
+#endif
               test_impl(NULL, NULL, 0, 0, NULL, output2,
                         pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
                         sub_y_q3, ref8 + offset_r * w + offset_c, in_w, bd,
@@ -288,7 +292,11 @@
     for (int i = 0; i < num_loops; ++i)
       aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
           NULL, NULL, 0, 0, NULL, output, pred8, in_w, in_h, sub_x_q3, sub_y_q3,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+          ref8, in_w, bd, &dist_wtd_comp_params, subpel_search, 0);
+#else
           ref8, in_w, bd, &dist_wtd_comp_params, subpel_search);
+#endif
 
     aom_usec_timer_mark(&timer);
     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));

diff --git a/test/comp_mask_variance_test.cc b/test/comp_mask_variance_test.cc
index db9b995..4fcfdbe 100644
--- a/test/comp_mask_variance_test.cc
+++ b/test/comp_mask_variance_test.cc

@@ -292,7 +292,11 @@
     for (int j = 0; j < num_loops; ++j) {
       aom_highbd_comp_mask_upsampled_pred(
           NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
+#if CONFIG_2D_SR_FIX_COMPOUND_ME
+          MAX_SB_SIZE, mask, w, 0, bd_, subpel_search, 0);
+#else          
           MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
+#endif          
     }
     aom_usec_timer_mark(&timer);
     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));

diff --git a/test/warp_filter_test_util.cc b/test/warp_filter_test_util.cc
index e711bb1..b0e4ae4 100644
--- a/test/warp_filter_test_util.cc
+++ b/test/warp_filter_test_util.cc

@@ -243,7 +243,11 @@
 
               av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
                                        out_w, out_h, out_w, sub_x, sub_y, bd,
+#if CONFIG_2D_SR_SUBSAMPLE_FOR_WARP
+                                       &conv_params, alpha, beta, gamma, delta, NULL);
+#else
                                        &conv_params, alpha, beta, gamma, delta);
+#endif 
               if (use_no_round) {
                 // TODO(angiebird): Change this to test_impl once we have SIMD
                 // implementation
commit	dd4a6d00054a5e47c65f90ea6265a4eeed919e64	[log] [tgz]
author	Kiran Misra <misrakir@amazon.com>	Fri Feb 23 16:47:03 2024 -0800
committer	Andrew Segall <asegall@amazon.com>	Mon Mar 18 00:04:40 2024 -0700
tree	72f9e92e432b7ab690bde30a6bd32e8ca169377c
parent	ca0e5fd3ac569a4a03aa333b0850219f52543f12 [diff]