Fix few encoder bugs of super-res mode This commit fix few encoder bugs of super-res mode. All of the changes are non-normative. This commit does not change the results of CTC configurations. Following bugs are fixed. 1. Fixed the various variance computation functions when scaled reference is enabled. 2. Properly re-allocate superblock size dependent buffers 3. Fix frame level SSE computations 4. Properly saved and restored necessary data after and before encoding of each resolution 5. Properly restore the scaled prediction buffers and scaling factors during single and compound motion search 6. Adjust QP of the frame based on resolution when super-res is enabled. Before the above fixes, there was an encoder crash with super-res mode on. Item (2) of the above list can fix encoder crash but produces large BD rate loss. With all of the fixes above gain of superres mode is neutral. STATS_CHANGED

diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 3f6f211..5674bb4 100644
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl

@@ -645,20 +645,20 @@
   add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                           const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
                                                           int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8, int ref_stride,
-							  int bd, int subpel_search";
+							  int bd, int subpel_search, int is_scaled_ref";
   specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
 
   add_proto qw/void aom_highbd_dist_wtd_comp_avg_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                               const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
                                                               int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8,
-                                                              int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search";
+                                                              int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search, int is_scaled_ref";
   specialize qw/aom_highbd_dist_wtd_comp_avg_upsampled_pred sse2/;
 
   add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
                                                               const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
                                                               int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8,
                                                               int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
-                                                              int bd, int subpel_search";
+                                                              int bd, int subpel_search, int is_scaled_ref";
 
 
   #

diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c
index 5ba3f15..78ec068 100644
--- a/aom_dsp/variance.c
+++ b/aom_dsp/variance.c

@@ -580,12 +580,12 @@
     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
     const MV *const mv, uint16_t *comp_pred, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
-    int ref_stride, int bd, int subpel_search) {
+    int ref_stride, int bd, int subpel_search, int is_scaled_ref) {
   int i, j;
 
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                            bd, subpel_search, 0);
+                            bd, subpel_search, is_scaled_ref);
   for (i = 0; i < height; ++i) {
     for (j = 0; j < width; ++j) {
       comp_pred[j] = ROUND_POWER_OF_TWO(pred[j] + comp_pred[j], 1);
@@ -620,13 +620,13 @@
     const MV *const mv, uint16_t *comp_pred, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
-    int subpel_search) {
+    int subpel_search, int is_scaled_ref) {
   int i, j;
   const int fwd_offset = jcp_param->fwd_offset;
   const int bck_offset = jcp_param->bck_offset;
   aom_highbd_upsampled_pred_c(xd, cm, mi_row, mi_col, mv, comp_pred, width,
                               height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                              bd, subpel_search, 0);
+                              bd, subpel_search, is_scaled_ref);
 
   for (i = 0; i < height; i++) {
     for (j = 0; j < width; j++) {
@@ -663,10 +663,10 @@
     const MV *const mv, uint16_t *comp_pred, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
     int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
-    int bd, int subpel_search) {
+    int bd, int subpel_search, int is_scaled_ref) {
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                            bd, subpel_search, 0);
+                            bd, subpel_search, is_scaled_ref);
   aom_highbd_comp_mask_pred(comp_pred, pred, width, height, comp_pred, width,
                             mask, mask_stride, invert_mask);
 }

diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index 27a1e5d..096e994 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c

@@ -875,10 +875,10 @@
     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
     const MV *const mv, uint16_t *comp_pred16, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
-    int ref_stride, int bd, int subpel_search) {
+    int ref_stride, int bd, int subpel_search, int is_scaled_ref) {
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred16, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                            bd, subpel_search, 0);
+                            bd, subpel_search, is_scaled_ref);
   /*The total number of pixels must be a multiple of 8 (e.g., 4x4).*/
   assert(!(width * height & 7));
   int n = width * height >> 3;
@@ -959,12 +959,12 @@
     const MV *const mv, uint16_t *comp_pred16, const uint16_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref,
     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
-    int subpel_search) {
+    int subpel_search, int is_scaled_ref) {
   int n;
   int i;
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred16, width,
                             height, subpel_x_q3, subpel_y_q3, ref, ref_stride,
-                            bd, subpel_search, 0);
+                            bd, subpel_search, is_scaled_ref);
   assert(!(width * height & 7));
   n = width * height >> 3;
 

diff --git a/aom_dsp/x86/obmc_variance_sse4.c b/aom_dsp/x86/obmc_variance_sse4.c
index fa5303e..eac2c82 100644
--- a/aom_dsp/x86/obmc_variance_sse4.c
+++ b/aom_dsp/x86/obmc_variance_sse4.c

@@ -141,7 +141,7 @@
   } else if (w < 128 || h < 128) {
     hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w, h);
   } else {
-    assert(w == 128 && h == 128);
+    assert(w >= 128 && h >= 128);
 
     do {
       hbd_obmc_variance_w8n(pre8, pre_stride, wsrc, mask, &sse64, &sum64, w,

diff --git a/av1/common/enums.h b/av1/common/enums.h
index b4c96b6..37efd83 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h

@@ -29,6 +29,7 @@
 
 #undef MAX_SB_SIZE
 #define BAWP_BUGFIX 1
+#define ADJUST_SUPER_RES_Q 1
 
 #if CONFIG_SUBBLK_REF_EXT
 #define SUBBLK_REF_EXT_LINES 2

diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 39b6712..13f96ec 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c

@@ -2113,7 +2113,6 @@
 // Returns 1 if the assigned width or height was <= 0.
 int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
   AV1_COMMON *cm = &cpi->common;
-  InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
   av1_check_initial_width(cpi, cm->seq_params.subsampling_x,
                           cm->seq_params.subsampling_y);
 
@@ -2122,20 +2121,31 @@
   cm->width = width;
   cm->height = height;
 
-  if (initial_dimensions->width && initial_dimensions->height &&
-      (cm->width > initial_dimensions->width ||
-       cm->height > initial_dimensions->height)) {
-    av1_free_context_buffers(cm);
-    av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
-    av1_free_sms_tree(&cpi->td);
+  const BLOCK_SIZE old_sb_size = cm->sb_size;
+  const BLOCK_SIZE sb_size = av1_select_sb_size(cpi);
+  if (!cpi->seq_params_locked) {
+    set_sb_size(cm, sb_size);
+  } else {
+    av1_set_frame_sb_size(cm, sb_size);
+  }
+  cpi->td.sb_size = cm->sb_size;
+
+  if (cpi->alloc_width && cpi->alloc_height) {
+    if (old_sb_size != cm->sb_size) {
+      // Reallocate sb_size-dependent buffers if the sb_size has changed.
+      reallocate_sb_size_dependent_buffers(cpi);
+    } else if (cm->width > cpi->alloc_width || cm->height > cpi->alloc_height) {
+      av1_free_context_buffers(cm);
+      av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
+      av1_free_sms_tree(&cpi->td);
 #if CONFIG_EXT_RECUR_PARTITIONS
-    av1_free_sms_bufs(&cpi->td);
+      av1_free_sms_bufs(&cpi->td);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-    av1_free_pmc(cpi->td.firstpass_ctx, av1_num_planes(cm));
-    cpi->td.firstpass_ctx = NULL;
-    alloc_compressor_data(cpi);
-    realloc_segmentation_maps(cpi);
-    initial_dimensions->width = initial_dimensions->height = 0;
+      av1_free_pmc(cpi->td.firstpass_ctx, av1_num_planes(cm));
+      cpi->td.firstpass_ctx = NULL;
+      alloc_compressor_data(cpi);
+      realloc_segmentation_maps(cpi);
+    }
   }
   update_frame_size(cpi);
 
@@ -3606,7 +3616,19 @@
 
   // Compute sse and rate.
   if (sse != NULL) {
-    *sse = aom_highbd_get_y_sse(cpi->source, &cm->cur_frame->buf);
+    int64_t tip_as_ref_sse =
+        aom_highbd_get_y_sse(cpi->source, &cm->cur_frame->buf);
+#if CONFIG_TIP_DIRECT_FRAME_MV
+    tip_as_ref_sse += aom_highbd_sse(
+        cpi->source->u_buffer, cpi->source->uv_stride,
+        cm->cur_frame->buf.u_buffer, cm->cur_frame->buf.uv_stride,
+        cpi->source->uv_width, cpi->source->uv_height);
+    tip_as_ref_sse += aom_highbd_sse(
+        cpi->source->v_buffer, cpi->source->uv_stride,
+        cm->cur_frame->buf.v_buffer, cm->cur_frame->buf.uv_stride,
+        cpi->source->uv_width, cpi->source->uv_height);
+#endif  // CONFIG_TIP_DIRECT_FRAME_MV
+    *sse = tip_as_ref_sse;
   }
   if (rate != NULL) {
     const int64_t bits = (*size << 3);
@@ -3628,11 +3650,35 @@
 static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
                                             uint8_t *dest,
                                             int *largest_tile_id) {
-  const AV1_COMMON *const cm = &cpi->common;
+  AV1_COMMON *const cm = &cpi->common;
   assert(cm->seq_params.enable_superres);
   assert(av1_superres_in_recode_allowed(cpi));
   aom_codec_err_t err = AOM_CODEC_OK;
   av1_save_all_coding_context(cpi);
+  FrameProbInfo *const frame_probs = &cpi->frame_probs;
+  const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+  int warped_probs_tmp = frame_probs->warped_probs[update_type];
+
+  aom_superres_mode orig_superres_mode = cpi->superres_mode;
+  cpi->superres_mode = AOM_SUPERRES_NONE;
+  int top_index = 0, bottom_index = 0, full_res_q = 0;
+  full_res_q =
+      av1_rc_pick_q_and_bounds(cpi, &cpi->rc, cm->width, cm->height,
+                               cpi->gf_group.index, &bottom_index, &top_index);
+  const int64_t rdmult = av1_compute_rd_mult_based_on_qindex(cpi, full_res_q);
+
+  cpi->superres_mode = orig_superres_mode;
+  int q_th = 160 + (MAXQ_OFFSET * (cm->seq_params.bit_depth - 8));
+
+  int do_not_search_superres = full_res_q <= q_th;
+  if (do_not_search_superres) {
+    restore_all_coding_context(cpi);
+    cpi->superres_mode = AOM_SUPERRES_NONE;
+    frame_probs->warped_probs[update_type] = warped_probs_tmp;
+    err = encode_with_recode_loop_and_filter(cpi, size, dest, NULL, NULL,
+                                             largest_tile_id);
+    return err;
+  }
 
   int64_t sse1 = INT64_MAX;
   int64_t rate1 = INT64_MAX;
@@ -3641,6 +3687,7 @@
   int64_t rate2 = INT64_MAX;
   int largest_tile_id2;
   double proj_rdcost1 = DBL_MAX;
+  const int search_step = 2;
 
   // Encode with superres.
   if (cpi->sf.hl_sf.superres_auto_search_type == SUPERRES_AUTO_ALL) {
@@ -3652,13 +3699,18 @@
     const GF_GROUP *const gf_group = &cpi->gf_group;
     if (gf_group->update_type[gf_group->index] != OVERLAY_UPDATE &&
         gf_group->update_type[gf_group->index] != INTNL_OVERLAY_UPDATE) {
-      for (int denom = SCALE_NUMERATOR + 1; denom <= 2 * SCALE_NUMERATOR;
-           ++denom) {
+      for (int denom = SCALE_NUMERATOR + 2; denom <= 2 * SCALE_NUMERATOR;
+           denom += search_step) {
         superres_cfg->superres_scale_denominator = denom;
         superres_cfg->superres_kf_scale_denominator = denom;
         const int this_index = denom - (SCALE_NUMERATOR + 1);
 
         cpi->superres_mode = AOM_SUPERRES_AUTO;  // Super-res on for this loop.
+        frame_probs->warped_probs[update_type] = warped_probs_tmp;
+        if (cpi->superres_mode == AOM_SUPERRES_AUTO &&
+            superres_cfg->superres_scale_denominator != SCALE_NUMERATOR) {
+          cpi->common.features.allow_screen_content_tools = 0;
+        }
         err = encode_with_recode_loop_and_filter(
             cpi, size, dest, &superres_sses[this_index],
             &superres_rates[this_index],
@@ -3680,18 +3732,15 @@
     }
     // Encode without superres.
     assert(cpi->superres_mode == AOM_SUPERRES_NONE);
+    frame_probs->warped_probs[update_type] = warped_probs_tmp;
     err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse2, &rate2,
                                              &largest_tile_id2);
     if (err != AOM_CODEC_OK) return err;
 
-    // Note: Both use common rdmult based on base qindex of fullres.
-    const int64_t rdmult =
-        av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
-
     // Find the best rdcost among all superres denoms.
     int best_denom = -1;
-    for (int denom = SCALE_NUMERATOR + 1; denom <= 2 * SCALE_NUMERATOR;
-         ++denom) {
+    for (int denom = SCALE_NUMERATOR + 2; denom <= 2 * SCALE_NUMERATOR;
+         denom += search_step) {
       const int this_index = denom - (SCALE_NUMERATOR + 1);
       const int64_t this_sse = superres_sses[this_index];
       const int64_t this_rate = superres_rates[this_index];
@@ -3721,6 +3770,11 @@
       int64_t rate3 = INT64_MAX;
       cpi->superres_mode =
           AOM_SUPERRES_AUTO;  // Super-res on for this recode loop.
+      frame_probs->warped_probs[update_type] = warped_probs_tmp;
+      if (cpi->superres_mode == AOM_SUPERRES_AUTO &&
+          superres_cfg->superres_scale_denominator != SCALE_NUMERATOR) {
+        cpi->common.features.allow_screen_content_tools = 0;
+      }
       err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse3, &rate3,
                                                largest_tile_id);
       cpi->superres_mode = AOM_SUPERRES_NONE;  // Reset to default (full-res).
@@ -3737,6 +3791,10 @@
     assert(cpi->sf.hl_sf.superres_auto_search_type == SUPERRES_AUTO_DUAL);
     cpi->superres_mode =
         AOM_SUPERRES_AUTO;  // Super-res on for this recode loop.
+    frame_probs->warped_probs[update_type] = warped_probs_tmp;
+    if (cpi->superres_mode == AOM_SUPERRES_AUTO) {
+      cpi->common.features.allow_screen_content_tools = 0;
+    }
     err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse1, &rate1,
                                              &largest_tile_id1);
     cpi->superres_mode = AOM_SUPERRES_NONE;  // Reset to default (full-res).
@@ -3744,13 +3802,12 @@
     restore_all_coding_context(cpi);
     // Encode without superres.
     assert(cpi->superres_mode == AOM_SUPERRES_NONE);
+    frame_probs->warped_probs[update_type] = warped_probs_tmp;
     err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse2, &rate2,
                                              &largest_tile_id2);
+
     if (err != AOM_CODEC_OK) return err;
 
-    // Note: Both use common rdmult based on base qindex of fullres.
-    const int64_t rdmult =
-        av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
     proj_rdcost1 = RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, rate1, sse1,
                                                   cm->seq_params.bit_depth);
     const double proj_rdcost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
@@ -3765,6 +3822,10 @@
       int64_t rate3 = INT64_MAX;
       cpi->superres_mode =
           AOM_SUPERRES_AUTO;  // Super-res on for this recode loop.
+      frame_probs->warped_probs[update_type] = warped_probs_tmp;
+      if (cpi->superres_mode == AOM_SUPERRES_AUTO) {
+        cpi->common.features.allow_screen_content_tools = 0;
+      }
       err = encode_with_recode_loop_and_filter(cpi, size, dest, &sse3, &rate3,
                                                largest_tile_id);
       cpi->superres_mode = AOM_SUPERRES_NONE;  // Reset to default (full-res).

diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index b55468b..429d78b 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h

@@ -2539,6 +2539,7 @@
   YV12_BUFFER_CONFIG copy_buffer;
   RATE_CONTROL rc;
   MV_STATS mv_stats;
+  FeatureFlags features;
 } CODING_CONTEXT;
 
 typedef struct {
@@ -3197,6 +3198,18 @@
    */
   int last_encoded_frame_order_hint;
 #endif  // CONFIG_PRIMARY_REF_FRAME_OPT
+  /*!
+   * allocation width
+   */
+  int alloc_width;
+  /*!
+   * allocation height
+   */
+  int alloc_height;
+  /*!
+   * allocation sb_size
+   */
+  int alloc_sb_size;
 } AV1_COMP;
 
 /*!

diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index bf3c91b..f4d298c 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h

@@ -55,7 +55,9 @@
 static AOM_INLINE void alloc_compressor_data(AV1_COMP *cpi) {
   AV1_COMMON *cm = &cpi->common;
   TokenInfo *token_info = &cpi->token_info;
-
+  cpi->alloc_width = cm->width;
+  cpi->alloc_height = cm->height;
+  cpi->alloc_sb_size = cm->sb_size;
   if (av1_alloc_context_buffers(cm, cm->width, cm->height)) {
     aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate context buffers");
@@ -288,6 +290,9 @@
     aom_free(cpi->consec_zero_mv);
     cpi->consec_zero_mv = NULL;
   }
+  cpi->alloc_width = 0;
+  cpi->alloc_height = 0;
+  cpi->alloc_sb_size = 0;
 }
 
 static AOM_INLINE void variance_partition_alloc(AV1_COMP *cpi) {

diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index 2ce4368..33d0e87 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c

@@ -765,7 +765,7 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 }
 
-static AOM_INLINE void reallocate_sb_size_dependent_buffers(AV1_COMP *cpi) {
+void reallocate_sb_size_dependent_buffers(AV1_COMP *cpi) {
   // Note: this is heavier than it needs to be. We can avoid reallocating some
   // of the buffers.
   AV1_COMMON *const cm = &cpi->common;
@@ -844,6 +844,9 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   av1_set_tile_info(cm, &cpi->oxcf.tile_cfg);
   if (cm->sb_size != old_sb_size) {
+    // Reallocate sb_size-dependent buffers if the sb_size has changed.
+    reallocate_sb_size_dependent_buffers(cpi);
+  } else if (cpi->alloc_width < cm->width || cpi->alloc_height < cm->height) {
     av1_free_context_buffers(cm);
     av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
     av1_free_sms_tree(&cpi->td);
@@ -861,11 +864,6 @@
     }
   }
 
-  if (cm->seq_params.sb_size != old_sb_size) {
-    // Reallocate sb_size-dependent buffers if the sb_size has changed.
-    reallocate_sb_size_dependent_buffers(cpi);
-  }
-
   av1_zero(cm->cur_frame->interp_filter_selected);
 #if CONFIG_PRIMARY_REF_FRAME_OPT
   cm->prev_frame =
@@ -1317,6 +1315,7 @@
   cc->cdef_info = cm->cdef_info;
   cc->rc = cpi->rc;
   cc->mv_stats = cpi->mv_stats;
+  cc->features = cm->features;
 }
 
 void av1_save_all_coding_context(AV1_COMP *cpi) {

diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index 7944bd3..e2022c2 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h

@@ -1070,6 +1070,7 @@
   cm->cdef_info = cc->cdef_info;
   cpi->rc = cc->rc;
   cpi->mv_stats = cc->mv_stats;
+  cm->features = cc->features;
 }
 
 static AOM_INLINE int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a,
@@ -1264,6 +1265,8 @@
   }
   av1_calculate_tile_rows(cm, mi_params->mi_rows, tiles);
 }
+
+void reallocate_sb_size_dependent_buffers(AV1_COMP *cpi);
 #ifdef __cplusplus
 }  // extern "C"
 #endif

diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index fdd7ad5..5b39ff4 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c

@@ -3369,7 +3369,7 @@
       aom_highbd_comp_mask_upsampled_pred(
           xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, subpel_x_q3,
           subpel_y_q3, ref, ref_stride, mask, mask_stride, invert_mask, xd->bd,
-          subpel_search_type);
+          subpel_search_type, is_scaled_ref);
     } else {
       if (get_cwp_idx(xd->mi[0]) != CWP_EQUAL) {
         DIST_WTD_COMP_PARAMS jcp_param;
@@ -3378,13 +3378,13 @@
         aom_highbd_dist_wtd_comp_avg_upsampled_pred(
             xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h,
             subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, &jcp_param,
-            subpel_search_type);
+            subpel_search_type, is_scaled_ref);
       } else
 
         aom_highbd_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv,
                                            pred, second_pred, w, h, subpel_x_q3,
                                            subpel_y_q3, ref, ref_stride, xd->bd,
-                                           subpel_search_type);
+                                           subpel_search_type, is_scaled_ref);
     }
   } else {
     aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h,
@@ -5692,10 +5692,13 @@
   const int mi_col = xd->mi_col;
 
   unsigned int besterr;
+  const int is_scaled_ref = ms_buffers->src->width == ms_buffers->ref->width &&
+                            ms_buffers->src->height == ms_buffers->ref->height;
+
   DECLARE_ALIGNED(16, uint16_t, pred[MAX_SB_SQUARE]);
   aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h,
                             subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd,
-                            subpel_search_type, 0);
+                            subpel_search_type, is_scaled_ref);
   besterr = vfp->ovf(pred, w, wsrc, mask, sse);
 
   return besterr;

diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 48cf35f..70baf03 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c

@@ -801,7 +801,12 @@
     av1_enc_build_one_inter_predictor(second_pred, pw, &cur_mv[!id].as_mv,
                                       &inter_pred_params);
     // Do full-pixel compound motion search on the current reference frame.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[id];
+    if (id) {
+      xd->plane[plane].pre[0] = ref_yv12[id];
+      const struct scale_factors *tmp_sf = xd->block_ref_scale_factors[0];
+      xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[id];
+      xd->block_ref_scale_factors[id] = tmp_sf;
+    }
 
 #if CONFIG_IBC_BV_IMPROVEMENT
     const int is_ibc_cost = 0;
@@ -887,7 +892,12 @@
     }
 
     // Restore the pointer to the first prediction buffer.
-    if (id) xd->plane[plane].pre[0] = ref_yv12[0];
+    if (id) {
+      xd->plane[plane].pre[0] = ref_yv12[0];
+      const struct scale_factors *tmp_sf = xd->block_ref_scale_factors[0];
+      xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[id];
+      xd->block_ref_scale_factors[id] = tmp_sf;
+    }
     if (bestsme < last_besterr[id]) {
       cur_mv[id] = best_mv;
       last_besterr[id] = bestsme;
@@ -933,6 +943,9 @@
   if (ref_idx) {
     orig_yv12 = pd->pre[0];
     pd->pre[0] = pd->pre[ref_idx];
+    const struct scale_factors *tmp_sf = xd->block_ref_scale_factors[0];
+    xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+    xd->block_ref_scale_factors[ref_idx] = tmp_sf;
   }
 
   if (scaled_ref_frame) {
@@ -972,7 +985,13 @@
     }
   }
   // Restore the pointer to the first unscaled prediction buffer.
-  if (ref_idx) pd->pre[0] = orig_yv12;
+  if (ref_idx) {
+    pd->pre[ref_idx] = pd->pre[0];
+    pd->pre[0] = orig_yv12;
+    const struct scale_factors *tmp_sf = xd->block_ref_scale_factors[0];
+    xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+    xd->block_ref_scale_factors[ref_idx] = tmp_sf;
+  }
 
   if (bestsme < INT_MAX) {
     *this_mv = best_mv.as_mv;
@@ -1006,7 +1025,10 @@
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
-  const int ref = mbmi->ref_frame[ref_idx];
+  const MV_REFERENCE_FRAME refs[2] = { mbmi->ref_frame[0],
+                                       is_interintra_mode(mbmi)
+                                           ? INTRA_FRAME
+                                           : mbmi->ref_frame[1] };
   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
   struct macroblockd_plane *const pd = &xd->plane[0];
   const MvCosts *mv_costs = &x->mv_costs;
@@ -1015,6 +1037,33 @@
   const int is_ibc_cost = 0;
 #endif
 
+  struct buf_2d backup_yv12[2][MAX_MB_PLANE];
+  const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
+    av1_get_scaled_ref_frame(cpi, refs[0]),
+    av1_get_scaled_ref_frame(cpi, refs[1])
+  };
+
+#if CONFIG_EXTENDED_WARP_PREDICTION
+  // Check that this is either an interinter or an interintra block
+  assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi)));
+#endif  // CONFIG_EXTENDED_WARP_PREDICTION
+
+  for (int idx = 0; idx < 2; idx++) {
+    if (scaled_ref_frame[idx]) {
+      // Swap out the reference frame for a version that's been scaled to
+      // match the resolution of the current frame, allowing the existing
+      // full-pixel motion search code to be used without additional
+      // modifications.
+      for (int i = 0; i < num_planes; i++) {
+        backup_yv12[idx][i] = xd->plane[i].pre[idx];
+      }
+      const int mi_row = xd->mi_row;
+      const int mi_col = xd->mi_col;
+      av1_setup_pre_planes(xd, idx, scaled_ref_frame[idx], mi_row, mi_col, NULL,
+                           num_planes, &mbmi->chroma_ref_info);
+    }
+  }
+
   InterPredParams inter_pred_params;
   if (is_joint_mvd_coding_mode(mbmi->mode)) {
     const int pw = block_size_wide[bsize];
@@ -1027,38 +1076,18 @@
     struct buf_2d ref_yv12 = xd->plane[0].pre[!ref_idx];
     av1_init_inter_params(&inter_pred_params, pw, ph, mi_row * MI_SIZE,
                           mi_col * MI_SIZE, 0, 0, xd->bd, 0, &cm->sf_identity,
-                          &ref_yv12, EIGHTTAP_REGULAR);
+                          &ref_yv12, mbmi->interp_fltr);
     inter_pred_params.conv_params = get_conv_params(0, PLANE_TYPE_Y, xd->bd);
   }
 
-  struct buf_2d backup_yv12[MAX_MB_PLANE];
-  const YV12_BUFFER_CONFIG *const scaled_ref_frame =
-      av1_get_scaled_ref_frame(cpi, ref);
-
-#if CONFIG_EXTENDED_WARP_PREDICTION
-  // Check that this is either an interinter or an interintra block
-  assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi)));
-#endif  // CONFIG_EXTENDED_WARP_PREDICTION
-
-  // Store the first prediction buffer.
-  struct buf_2d orig_yv12;
   if (ref_idx) {
+    struct buf_2d orig_yv12;
     orig_yv12 = pd->pre[0];
     pd->pre[0] = pd->pre[ref_idx];
-  }
-
-  if (scaled_ref_frame) {
-    // Swap out the reference frame for a version that's been scaled to
-    // match the resolution of the current frame, allowing the existing
-    // full-pixel motion search code to be used without additional
-    // modifications.
-    for (int i = 0; i < num_planes; i++) {
-      backup_yv12[i] = xd->plane[i].pre[0];
-    }
-    const int mi_row = xd->mi_row;
-    const int mi_col = xd->mi_col;
-    av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
-                         num_planes, &mbmi->chroma_ref_info);
+    pd->pre[ref_idx] = orig_yv12;
+    const struct scale_factors *tmp_sf = xd->block_ref_scale_factors[0];
+    xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+    xd->block_ref_scale_factors[ref_idx] = tmp_sf;
   }
 
   int bestsme = INT_MAX;
@@ -1085,6 +1114,15 @@
 
     bestsme = adaptive_mvd_search(cm, xd, &ms_params, ref_mv.as_mv,
                                   &best_mv.as_mv, &dis, &sse);
+    for (int idx = 0; idx < 2; idx++) {
+      if (scaled_ref_frame[idx]) {
+        // Swap back the original buffers for subpel motion search
+        for (int i = 0; i < num_planes; i++) {
+          xd->plane[i].pre[idx] = backup_yv12[idx][i];
+        }
+      }
+    }
+
 #if CONFIG_VQ_MVD_CODING
     if (bestsme == INT_MAX) best_mv.as_int = INVALID_MV;
 #endif  // CONFIG_VQ_MVD_CODING
@@ -1109,6 +1147,14 @@
                                  &best_mv.as_mv, &dis, &sse, ref_idx, other_mv,
                                  &best_other_mv.as_mv, second_pred,
                                  &inter_pred_params, NULL);
+      for (int idx = 0; idx < 2; idx++) {
+        if (scaled_ref_frame[idx]) {
+          // Swap back the original buffers for subpel motion search
+          for (int i = 0; i < num_planes; i++) {
+            xd->plane[i].pre[idx] = backup_yv12[idx][i];
+          }
+        }
+      }
     }
   } else if (mbmi->mode == JOINT_AMVDNEWMV ||
              mbmi->mode == JOINT_AMVDNEWMV_OPTFLOW) {
@@ -1132,6 +1178,16 @@
                                            &best_mv.as_mv, &dis, &sse, ref_idx,
                                            other_mv, &best_other_mv.as_mv,
                                            second_pred, &inter_pred_params);
+
+    for (int idx = 0; idx < 2; idx++) {
+      if (scaled_ref_frame[idx]) {
+        // Swap back the original buffers for subpel motion search
+        for (int i = 0; i < num_planes; i++) {
+          xd->plane[i].pre[idx] = backup_yv12[idx][i];
+        }
+      }
+    }
+
 #if CONFIG_VQ_MVD_CODING
     if (bestsme == INT_MAX) {
       best_mv.as_int = INVALID_MV;
@@ -1179,10 +1235,12 @@
                                          &best_mv.as_fullmv);
     }
 
-    if (scaled_ref_frame) {
-      // Swap back the original buffers for subpel motion search.
-      for (int i = 0; i < num_planes; i++) {
-        xd->plane[i].pre[0] = backup_yv12[i];
+    for (int idx = 0; idx < 2; idx++) {
+      if (scaled_ref_frame[idx]) {
+        // Swap back the original buffers for subpel motion search
+        for (int i = 0; i < num_planes; i++) {
+          xd->plane[i].pre[idx] = backup_yv12[idx][i];
+        }
       }
     }
 
@@ -1223,7 +1281,14 @@
   }
 
   // Restore the pointer to the first unscaled prediction buffer.
-  if (ref_idx) pd->pre[0] = orig_yv12;
+  if (ref_idx) {
+    struct buf_2d orig_yv12 = pd->pre[0];
+    pd->pre[0] = pd->pre[ref_idx];
+    pd->pre[ref_idx] = orig_yv12;
+    const struct scale_factors *tmp_sf = xd->block_ref_scale_factors[0];
+    xd->block_ref_scale_factors[0] = xd->block_ref_scale_factors[ref_idx];
+    xd->block_ref_scale_factors[ref_idx] = tmp_sf;
+  }
 
   if (bestsme < INT_MAX
 #if CONFIG_VQ_MVD_CODING

diff --git a/av1/encoder/ratectrl.c b/av1/encoder/ratectrl.c
index 72ce6dd..268611c 100644
--- a/av1/encoder/ratectrl.c
+++ b/av1/encoder/ratectrl.c

@@ -992,6 +992,9 @@
     if ((superres_mode == AOM_SUPERRES_QTHRESH ||
          superres_mode == AOM_SUPERRES_AUTO) &&
         superres_denom != SCALE_NUMERATOR) {
+#if ADJUST_SUPER_RES_Q
+      active_qp = rc_cfg->qp;
+#else
       int mult = SUPERRES_QADJ_PER_DENOM_KEYFRAME_SOLO;
       if (intra_only && rc->frames_to_key <= 1) {
         mult = 0;
@@ -1002,6 +1005,7 @@
       }
       active_qp =
           AOMMAX(active_qp - ((superres_denom - SCALE_NUMERATOR) * mult), 0);
+#endif  // ADJUST_SUPER_RES_Q
     }
   }
   if (rc_cfg->mode == AOM_CQ && rc->total_target_bits > 0) {
@@ -1687,6 +1691,24 @@
     q = rc_pick_q_and_bounds(cpi, width, height, gf_index, bottom_index,
                              top_index, &rc->level1_qp);
   }
+
+#if ADJUST_SUPER_RES_Q
+  // Maximum horizontal downscaled resolution can be 2x,
+  // For 2x resolution the value superres_scale_denominator is 16.
+  // It is assumed that the qindex value is reduced by 23 for 2x resolution
+  // The value 23 is found by experiments maynot be optimal value
+  // Assume 23 is maximum, the qindex is reduced by (23 *
+  // log2(superres_scale_denominator/8))
+  if (cpi->superres_mode == AOM_SUPERRES_AUTO &&
+      cpi->common.superres_scale_denominator != SCALE_NUMERATOR) {
+    q = AOMMAX(
+        q - ((int)(log2(((double)cpi->common.superres_scale_denominator) /
+                        SCALE_NUMERATOR) *
+                   23)),
+        0);
+  }
+#endif  // ADJUST_SUPER_RES_Q
+
   if (gf_group->update_type[gf_index] == ARF_UPDATE ||
       gf_group->update_type[gf_index] == KFFLT_UPDATE)
     rc->level1_qp = q;

diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 347772e..306c66e 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c

@@ -2088,7 +2088,10 @@
     return NULL;
   }
 
-  assert(ref_frame < cpi->common.ref_frames_info.num_total_refs);
+  if (ref_frame >= cpi->common.ref_frames_info.num_total_refs) {
+    return NULL;
+  }
+
   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame];
   const RefCntBuffer *const ref_buf =
       get_ref_frame_buf(&cpi->common, ref_frame);

diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 9df3129..6e7fed6 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c

@@ -812,7 +812,17 @@
   }
 
 #if CONFIG_SKIP_MODE_ENHANCEMENT
-  if (mbmi->skip_mode) return;
+  if (mbmi->skip_mode) {
+    // Go back to unscaled reference.
+    if (scaled_ref_frame) {
+      // We had temporarily setup pred block based on scaled reference above. Go
+      // back to unscaled reference now, for subsequent use.
+      av1_setup_pred_block(xd, yv12_mb[ref_frame_idx], yv12, sf, sf,
+                           num_planes);
+    }
+
+    return;
+  }
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   // Gets an initial list of candidate vectors from neighbours and orders them

diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 99e0ed3..1591b5b 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c

@@ -384,7 +384,7 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   sf->rd_sf.perform_coeff_opt = 1;
-  sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
+  sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_ALL;
 
   if (speed >= 1) {
     sf->inter_sf.selective_ref_frame = 2;

diff --git a/test/comp_avg_pred_test.h b/test/comp_avg_pred_test.h
index 50c1c5f..b6cf0f5 100644
--- a/test/comp_avg_pred_test.h
+++ b/test/comp_avg_pred_test.h

@@ -43,7 +43,8 @@
     MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
     const MV *const mv, uint8_t *comp_pred, const uint8_t *pred, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
-    int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search);
+    int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param, int subpel_search,
+    int is_scaled_ref);
 
 typedef std::tuple<distwtdcompavg_func, BLOCK_SIZE> DISTWTDCOMPAVGParam;
 
@@ -55,7 +56,7 @@
     const MV *const mv, uint16_t *comp_pred8, const uint16_t *pred8, int width,
     int height, int subpel_x_q3, int subpel_y_q3, const uint16_t *ref8,
     int ref_stride, int bd, const DIST_WTD_COMP_PARAMS *jcp_param,
-    int subpel_search);
+    int subpel_search, int is_scaled_ref);
 
 typedef std::tuple<int, highbddistwtdcompavgupsampled_func, BLOCK_SIZE>
     HighbdDISTWTDCOMPAVGUPSAMPLEDParam;
@@ -238,11 +239,11 @@
                   NULL, NULL, 0, 0, NULL, output,
                   pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
                   sub_y_q3, ref8 + offset_r * w + offset_c, in_w, bd,
-                  &dist_wtd_comp_params, subpel_search);
+                  &dist_wtd_comp_params, subpel_search, 0);
               test_impl(NULL, NULL, 0, 0, NULL, output2,
                         pred8 + offset_r * w + offset_c, in_w, in_h, sub_x_q3,
                         sub_y_q3, ref8 + offset_r * w + offset_c, in_w, bd,
-                        &dist_wtd_comp_params, subpel_search);
+                        &dist_wtd_comp_params, subpel_search, 0);
 
               for (int i = 0; i < in_h; ++i) {
                 for (int j = 0; j < in_w; ++j) {
@@ -292,7 +293,7 @@
     for (int i = 0; i < num_loops; ++i)
       aom_highbd_dist_wtd_comp_avg_upsampled_pred_c(
           NULL, NULL, 0, 0, NULL, output, pred8, in_w, in_h, sub_x_q3, sub_y_q3,
-          ref8, in_w, bd, &dist_wtd_comp_params, subpel_search);
+          ref8, in_w, bd, &dist_wtd_comp_params, subpel_search, 0);
 
     aom_usec_timer_mark(&timer);
     const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
@@ -304,7 +305,8 @@
 
     for (int i = 0; i < num_loops; ++i)
       test_impl(NULL, NULL, 0, 0, NULL, output2, pred8, in_w, in_h, sub_x_q3,
-                sub_y_q3, ref8, in_w, bd, &dist_wtd_comp_params, subpel_search);
+                sub_y_q3, ref8, in_w, bd, &dist_wtd_comp_params, subpel_search,
+                0);
 
     aom_usec_timer_mark(&timer1);
     const int elapsed_time1 = static_cast<int>(aom_usec_timer_elapsed(&timer1));

diff --git a/test/comp_mask_variance_test.cc b/test/comp_mask_variance_test.cc
index db9b995..33797e8 100644
--- a/test/comp_mask_variance_test.cc
+++ b/test/comp_mask_variance_test.cc

@@ -292,7 +292,7 @@
     for (int j = 0; j < num_loops; ++j) {
       aom_highbd_comp_mask_upsampled_pred(
           NULL, NULL, 0, 0, NULL, comp_pred1_, pred_, w, h, subx, suby, ref_,
-          MAX_SB_SIZE, mask, w, 0, bd_, subpel_search);
+          MAX_SB_SIZE, mask, w, 0, bd_, subpel_search, 0);
     }
     aom_usec_timer_mark(&timer);
     double time = static_cast<double>(aom_usec_timer_elapsed(&timer));